1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f32x8 { avx: m256 }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f32x8 { a : f32x4, b : f32x4 }
12 }
13}
14
15macro_rules! const_f32_as_f32x8 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f32x8 = f32x8::new([$f; 8]);
19 };
20}
21
22impl f32x8 {
23 const_f32_as_f32x8!(ONE, 1.0);
24 const_f32_as_f32x8!(HALF, 0.5);
25 const_f32_as_f32x8!(ZERO, 0.0);
26 const_f32_as_f32x8!(E, core::f32::consts::E);
27 const_f32_as_f32x8!(FRAC_1_PI, core::f32::consts::FRAC_1_PI);
28 const_f32_as_f32x8!(FRAC_2_PI, core::f32::consts::FRAC_2_PI);
29 const_f32_as_f32x8!(FRAC_2_SQRT_PI, core::f32::consts::FRAC_2_SQRT_PI);
30 const_f32_as_f32x8!(FRAC_1_SQRT_2, core::f32::consts::FRAC_1_SQRT_2);
31 const_f32_as_f32x8!(FRAC_PI_2, core::f32::consts::FRAC_PI_2);
32 const_f32_as_f32x8!(FRAC_PI_3, core::f32::consts::FRAC_PI_3);
33 const_f32_as_f32x8!(FRAC_PI_4, core::f32::consts::FRAC_PI_4);
34 const_f32_as_f32x8!(FRAC_PI_6, core::f32::consts::FRAC_PI_6);
35 const_f32_as_f32x8!(FRAC_PI_8, core::f32::consts::FRAC_PI_8);
36 const_f32_as_f32x8!(LN_2, core::f32::consts::LN_2);
37 const_f32_as_f32x8!(LN_10, core::f32::consts::LN_10);
38 const_f32_as_f32x8!(LOG2_E, core::f32::consts::LOG2_E);
39 const_f32_as_f32x8!(LOG10_E, core::f32::consts::LOG10_E);
40 const_f32_as_f32x8!(LOG10_2, core::f32::consts::LOG10_2);
41 const_f32_as_f32x8!(LOG2_10, core::f32::consts::LOG2_10);
42 const_f32_as_f32x8!(PI, core::f32::consts::PI);
43 const_f32_as_f32x8!(SQRT_2, core::f32::consts::SQRT_2);
44 const_f32_as_f32x8!(TAU, core::f32::consts::TAU);
45}
46
47unsafe impl Zeroable for f32x8 {}
48unsafe impl Pod for f32x8 {}
49
50impl Add for f32x8 {
51 type Output = Self;
52 #[inline]
53 #[must_use]
54 fn add(self, rhs: Self) -> Self::Output {
55 pick! {
56 if #[cfg(target_feature="avx")] {
57 Self { avx: add_m256(self.avx, rhs.avx) }
58 } else {
59 Self {
60 a : self.a.add(rhs.a),
61 b : self.b.add(rhs.b),
62 }
63 }
64 }
65 }
66}
67
68impl Sub for f32x8 {
69 type Output = Self;
70 #[inline]
71 #[must_use]
72 fn sub(self, rhs: Self) -> Self::Output {
73 pick! {
74 if #[cfg(target_feature="avx")] {
75 Self { avx: sub_m256(self.avx, rhs.avx) }
76 } else {
77 Self {
78 a : self.a.sub(rhs.a),
79 b : self.b.sub(rhs.b),
80 }
81 }
82 }
83 }
84}
85
86impl Mul for f32x8 {
87 type Output = Self;
88 #[inline]
89 #[must_use]
90 fn mul(self, rhs: Self) -> Self::Output {
91 pick! {
92 if #[cfg(target_feature="avx")] {
93 Self { avx: mul_m256(self.avx, rhs.avx) }
94 } else {
95 Self {
96 a : self.a.mul(rhs.a),
97 b : self.b.mul(rhs.b),
98 }
99 }
100 }
101 }
102}
103
104impl Div for f32x8 {
105 type Output = Self;
106 #[inline]
107 #[must_use]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Add<f32> for f32x8 {
123 type Output = Self;
124 #[inline]
125 #[must_use]
126 fn add(self, rhs: f32) -> Self::Output {
127 self.add(Self::splat(rhs))
128 }
129}
130
131impl Sub<f32> for f32x8 {
132 type Output = Self;
133 #[inline]
134 #[must_use]
135 fn sub(self, rhs: f32) -> Self::Output {
136 self.sub(Self::splat(rhs))
137 }
138}
139
140impl Mul<f32> for f32x8 {
141 type Output = Self;
142 #[inline]
143 #[must_use]
144 fn mul(self, rhs: f32) -> Self::Output {
145 self.mul(Self::splat(rhs))
146 }
147}
148
149impl Div<f32> for f32x8 {
150 type Output = Self;
151 #[inline]
152 #[must_use]
153 fn div(self, rhs: f32) -> Self::Output {
154 self.div(Self::splat(rhs))
155 }
156}
157
158impl Add<f32x8> for f32 {
159 type Output = f32x8;
160 #[inline]
161 #[must_use]
162 fn add(self, rhs: f32x8) -> Self::Output {
163 f32x8::splat(self).add(rhs)
164 }
165}
166
167impl Sub<f32x8> for f32 {
168 type Output = f32x8;
169 #[inline]
170 #[must_use]
171 fn sub(self, rhs: f32x8) -> Self::Output {
172 f32x8::splat(self).sub(rhs)
173 }
174}
175
176impl Mul<f32x8> for f32 {
177 type Output = f32x8;
178 #[inline]
179 #[must_use]
180 fn mul(self, rhs: f32x8) -> Self::Output {
181 f32x8::splat(self).mul(rhs)
182 }
183}
184
185impl Div<f32x8> for f32 {
186 type Output = f32x8;
187 #[inline]
188 #[must_use]
189 fn div(self, rhs: f32x8) -> Self::Output {
190 f32x8::splat(self).div(rhs)
191 }
192}
193
194impl BitAnd for f32x8 {
195 type Output = Self;
196 #[inline]
197 #[must_use]
198 fn bitand(self, rhs: Self) -> Self::Output {
199 pick! {
200 if #[cfg(target_feature="avx")] {
201 Self { avx: bitand_m256(self.avx, rhs.avx) }
202 } else {
203 Self {
204 a : self.a.bitand(rhs.a),
205 b : self.b.bitand(rhs.b),
206 }
207 }
208 }
209 }
210}
211
212impl BitOr for f32x8 {
213 type Output = Self;
214 #[inline]
215 #[must_use]
216 fn bitor(self, rhs: Self) -> Self::Output {
217 pick! {
218 if #[cfg(target_feature="avx")] {
219 Self { avx: bitor_m256(self.avx, rhs.avx) }
220 } else {
221 Self {
222 a : self.a.bitor(rhs.a),
223 b : self.b.bitor(rhs.b),
224 }
225 }
226 }
227 }
228}
229
230impl BitXor for f32x8 {
231 type Output = Self;
232 #[inline]
233 #[must_use]
234 fn bitxor(self, rhs: Self) -> Self::Output {
235 pick! {
236 if #[cfg(target_feature="avx")] {
237 Self { avx: bitxor_m256(self.avx, rhs.avx) }
238 } else {
239 Self {
240 a : self.a.bitxor(rhs.a),
241 b : self.b.bitxor(rhs.b),
242 }
243 }
244 }
245 }
246}
247
248impl CmpEq for f32x8 {
249 type Output = Self;
250 #[inline]
251 #[must_use]
252 fn cmp_eq(self, rhs: Self) -> Self::Output {
253 pick! {
254 if #[cfg(target_feature="avx")] {
255 Self { avx: cmp_op_mask_m256::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
256 } else {
257 Self {
258 a : self.a.cmp_eq(rhs.a),
259 b : self.b.cmp_eq(rhs.b),
260 }
261 }
262 }
263 }
264}
265
266impl CmpGe for f32x8 {
267 type Output = Self;
268 #[inline]
269 #[must_use]
270 fn cmp_ge(self, rhs: Self) -> Self::Output {
271 pick! {
272 if #[cfg(target_feature="avx")] {
273 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
274 } else {
275 Self {
276 a : self.a.cmp_ge(rhs.a),
277 b : self.b.cmp_ge(rhs.b),
278 }
279 }
280 }
281 }
282}
283
284impl CmpGt for f32x8 {
285 type Output = Self;
286 #[inline]
287 #[must_use]
288 fn cmp_gt(self, rhs: Self) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="avx")] {
291 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterThanOrdered)}>(self.avx, rhs.avx) }
292 } else {
293 Self {
294 a : self.a.cmp_gt(rhs.a),
295 b : self.b.cmp_gt(rhs.b),
296 }
297 }
298 }
299 }
300}
301
302impl CmpNe for f32x8 {
303 type Output = Self;
304 #[inline]
305 #[must_use]
306 fn cmp_ne(self, rhs: Self) -> Self::Output {
307 pick! {
308 if #[cfg(target_feature="avx")] {
309 Self { avx: cmp_op_mask_m256::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
310 } else {
311 Self {
312 a : self.a.cmp_ne(rhs.a),
313 b : self.b.cmp_ne(rhs.b),
314 }
315 }
316 }
317 }
318}
319
320impl CmpLe for f32x8 {
321 type Output = Self;
322 #[inline]
323 #[must_use]
324 fn cmp_le(self, rhs: Self) -> Self::Output {
325 pick! {
326 if #[cfg(target_feature="avx")] {
327 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
328 } else {
329 Self {
330 a : self.a.cmp_le(rhs.a),
331 b : self.b.cmp_le(rhs.b),
332 }
333 }
334 }
335 }
336}
337
338impl CmpLt for f32x8 {
339 type Output = Self;
340 #[inline]
341 #[must_use]
342 fn cmp_lt(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx")] {
345 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
346 } else {
347 Self {
348 a : self.a.cmp_lt(rhs.a),
349 b : self.b.cmp_lt(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356impl f32x8 {
357 #[inline]
358 #[must_use]
359 pub const fn new(array: [f32; 8]) -> Self {
360 unsafe { core::intrinsics::transmute(array) }
361 }
362 #[inline]
363 #[must_use]
364 pub fn blend(self, t: Self, f: Self) -> Self {
365 pick! {
366 if #[cfg(target_feature="avx")] {
367 Self { avx: blend_varying_m256(f.avx, t.avx, self.avx) }
368 } else {
369 Self {
370 a : self.a.blend(t.a, f.a),
371 b : self.b.blend(t.b, f.b),
372 }
373 }
374 }
375 }
376 #[inline]
377 #[must_use]
378 pub fn abs(self) -> Self {
379 pick! {
380 if #[cfg(target_feature="avx")] {
381 let non_sign_bits = f32x8::from(f32::from_bits(i32::MAX as u32));
382 self & non_sign_bits
383 } else {
384 Self {
385 a : self.a.abs(),
386 b : self.b.abs(),
387 }
388 }
389 }
390 }
391 #[inline]
392 #[must_use]
393 pub fn floor(self) -> Self {
394 pick! {
395 if #[cfg(target_feature="avx")] {
396 Self { avx: floor_m256(self.avx) }
397 } else {
398 Self {
399 a : self.a.floor(),
400 b : self.b.floor(),
401 }
402 }
403 }
404 }
405 #[inline]
406 #[must_use]
407 pub fn ceil(self) -> Self {
408 pick! {
409 if #[cfg(target_feature="avx")] {
410 Self { avx: ceil_m256(self.avx) }
411 } else {
412 Self {
413 a : self.a.ceil(),
414 b : self.b.ceil(),
415 }
416 }
417 }
418 }
419
420 #[inline]
424 #[must_use]
425 pub fn fast_max(self, rhs: Self) -> Self {
426 pick! {
427 if #[cfg(target_feature="avx")] {
428 Self { avx: max_m256(self.avx, rhs.avx) }
429 } else {
430 Self {
431 a : self.a.fast_max(rhs.a),
432 b : self.b.fast_max(rhs.b),
433 }
434 }
435 }
436 }
437
438 #[inline]
441 #[must_use]
442 pub fn max(self, rhs: Self) -> Self {
443 pick! {
444 if #[cfg(target_feature="avx")] {
445 rhs.is_nan().blend(self, Self { avx: max_m256(self.avx, rhs.avx) })
449 } else {
450 Self {
451 a : self.a.max(rhs.a),
452 b : self.b.max(rhs.b),
453 }
454 }
455
456 }
457 }
458
459 #[inline]
463 #[must_use]
464 pub fn fast_min(self, rhs: Self) -> Self {
465 pick! {
466 if #[cfg(target_feature="avx")] {
467 Self { avx: min_m256(self.avx, rhs.avx) }
468 } else {
469 Self {
470 a : self.a.fast_min(rhs.a),
471 b : self.b.fast_min(rhs.b),
472 }
473 }
474 }
475 }
476
477 #[inline]
481 #[must_use]
482 pub fn min(self, rhs: Self) -> Self {
483 pick! {
484 if #[cfg(target_feature="avx")] {
485 rhs.is_nan().blend(self, Self { avx: min_m256(self.avx, rhs.avx) })
489 } else {
490 Self {
491 a : self.a.min(rhs.a),
492 b : self.b.min(rhs.b),
493 }
494 }
495 }
496 }
497 #[inline]
498 #[must_use]
499 pub fn is_nan(self) -> Self {
500 pick! {
501 if #[cfg(target_feature="avx")] {
502 Self { avx: cmp_op_mask_m256::<{cmp_op!(Unordered)}>(self.avx, self.avx) }
503 } else {
504 Self {
505 a : self.a.is_nan(),
506 b : self.b.is_nan(),
507 }
508 }
509 }
510 }
511 #[inline]
512 #[must_use]
513 pub fn is_finite(self) -> Self {
514 let shifted_exp_mask = u32x8::from(0xFF000000);
515 let u: u32x8 = cast(self);
516 let shift_u = u << 1_u64;
517 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
518 cast(out)
519 }
520 #[inline]
521 #[must_use]
522 pub fn is_inf(self) -> Self {
523 let shifted_inf = u32x8::from(0xFF000000);
524 let u: u32x8 = cast(self);
525 let shift_u = u << 1_u64;
526 let out = (shift_u).cmp_eq(shifted_inf);
527 cast(out)
528 }
529
530 #[inline]
531 #[must_use]
532 pub fn round(self) -> Self {
533 pick! {
534 if #[cfg(target_feature="avx")] {
536 Self { avx: round_m256::<{round_op!(Nearest)}>(self.avx) }
537 } else {
538 Self {
539 a : self.a.round(),
540 b : self.b.round(),
541 }
542 }
543 }
544 }
545
546 #[inline]
550 #[must_use]
551 pub fn fast_round_int(self) -> i32x8 {
552 pick! {
553 if #[cfg(target_feature="avx")] {
554 cast(convert_to_i32_m256i_from_m256(self.avx))
555 } else {
556 cast([
557 self.a.fast_round_int(),
558 self.b.fast_round_int()])
559 }
560 }
561 }
562
563 #[inline]
567 #[must_use]
568 pub fn round_int(self) -> i32x8 {
569 pick! {
570 if #[cfg(target_feature="avx")] {
571 let non_nan_mask = self.cmp_eq(self);
573 let non_nan = self & non_nan_mask;
574 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
575 let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
576 flip_to_max ^ cast
577 } else {
578 cast([
579 self.a.round_int(),
580 self.b.round_int(),
581 ])
582 }
583 }
584 }
585
586 #[inline]
590 #[must_use]
591 pub fn fast_trunc_int(self) -> i32x8 {
592 pick! {
593 if #[cfg(all(target_feature="avx"))] {
594 cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
595 } else {
596 cast([
597 self.a.fast_trunc_int(),
598 self.b.fast_trunc_int(),
599 ])
600 }
601 }
602 }
603
604 #[inline]
608 #[must_use]
609 pub fn trunc_int(self) -> i32x8 {
610 pick! {
611 if #[cfg(target_feature="avx")] {
612 let non_nan_mask = self.cmp_eq(self);
614 let non_nan = self & non_nan_mask;
615 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
616 let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
617 flip_to_max ^ cast
618 } else {
619 cast([
620 self.a.trunc_int(),
621 self.b.trunc_int(),
622 ])
623 }
624 }
625 }
626 #[inline]
627 #[must_use]
628 pub fn mul_add(self, m: Self, a: Self) -> Self {
629 pick! {
630 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
631 Self { avx: fused_mul_add_m256(self.avx, m.avx, a.avx) }
632 } else if #[cfg(target_feature="avx")] {
633 (self * m) + a
635 } else {
636 Self {
637 a : self.a.mul_add(m.a, a.a),
638 b : self.b.mul_add(m.b, a.b),
639 }
640 }
641 }
642 }
643
644 #[inline]
645 #[must_use]
646 pub fn mul_sub(self, m: Self, a: Self) -> Self {
647 pick! {
648 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
649 Self { avx: fused_mul_sub_m256(self.avx, m.avx, a.avx) }
650 } else if #[cfg(target_feature="avx")] {
651 (self * m) - a
653 } else {
654 Self {
655 a : self.a.mul_sub(m.a, a.a),
656 b : self.b.mul_sub(m.b, a.b),
657 }
658 }
659 }
660 }
661
662 #[inline]
663 #[must_use]
664 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
665 pick! {
666 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
667 Self { avx: fused_mul_neg_add_m256(self.avx, m.avx, a.avx) }
668 } else if #[cfg(target_feature="avx")] {
669 a - (self * m)
671 } else {
672 Self {
673 a : self.a.mul_neg_add(m.a, a.a),
674 b : self.b.mul_neg_add(m.b, a.b),
675 }
676 }
677 }
678 }
679
680 #[inline]
681 #[must_use]
682 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
683 pick! {
684 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
685 Self { avx: fused_mul_neg_sub_m256(self.avx, m.avx, a.avx) }
686 } else if #[cfg(target_feature="avx")] {
687 -(self * m) - a
689 } else {
690 Self {
691 a : self.a.mul_neg_sub(m.a, a.a),
692 b : self.b.mul_neg_sub(m.b, a.b),
693 }
694 }
695 }
696 }
697
698 #[inline]
699 #[must_use]
700 pub fn flip_signs(self, signs: Self) -> Self {
701 self ^ (signs & Self::from(-0.0))
702 }
703
704 #[inline]
705 #[must_use]
706 pub fn copysign(self, sign: Self) -> Self {
707 let magnitude_mask = Self::from(f32::from_bits(u32::MAX >> 1));
708 (self & magnitude_mask) | (sign & Self::from(-0.0))
709 }
710
711 #[inline]
712 pub fn asin_acos(self) -> (Self, Self) {
713 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
716 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
717 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
718 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
719 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
720
721 let xa = self.abs();
722 let big = xa.cmp_ge(f32x8::splat(0.5));
723
724 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
725 let x2 = xa * xa;
726 let x3 = big.blend(x1, x2);
727
728 let xb = x1.sqrt();
729
730 let x4 = big.blend(xb, xa);
731
732 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
733 let z = z.mul_add(x3 * x4, x4);
734
735 let z1 = z + z;
736
737 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
739 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
740 let acos = big.blend(z3, z4);
741
742 let z3 = f32x8::FRAC_PI_2 - z1;
744 let asin = big.blend(z3, z);
745 let asin = asin.flip_signs(self);
746
747 (asin, acos)
748 }
749
750 #[inline]
751 #[must_use]
752 pub fn asin(self) -> Self {
753 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
756 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
757 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
758 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
759 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
760
761 let xa = self.abs();
762 let big = xa.cmp_ge(f32x8::splat(0.5));
763
764 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
765 let x2 = xa * xa;
766 let x3 = big.blend(x1, x2);
767
768 let xb = x1.sqrt();
769
770 let x4 = big.blend(xb, xa);
771
772 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
773 let z = z.mul_add(x3 * x4, x4);
774
775 let z1 = z + z;
776
777 let z3 = f32x8::FRAC_PI_2 - z1;
779 let asin = big.blend(z3, z);
780 let asin = asin.flip_signs(self);
781
782 asin
783 }
784
785 #[inline]
786 #[must_use]
787 pub fn acos(self) -> Self {
788 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
791 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
792 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
793 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
794 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
795
796 let xa = self.abs();
797 let big = xa.cmp_ge(f32x8::splat(0.5));
798
799 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
800 let x2 = xa * xa;
801 let x3 = big.blend(x1, x2);
802
803 let xb = x1.sqrt();
804
805 let x4 = big.blend(xb, xa);
806
807 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
808 let z = z.mul_add(x3 * x4, x4);
809
810 let z1 = z + z;
811
812 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
814 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
815 let acos = big.blend(z3, z4);
816
817 acos
818 }
819
820 #[inline]
821 pub fn atan(self) -> Self {
822 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
825 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
826 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
827 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
828
829 let t = self.abs();
830
831 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
835 let notbig = t.cmp_le(Self::SQRT_2 + Self::ONE);
836
837 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
838 s = notsmal & s;
839
840 let mut a = notbig & t;
841 a = notsmal.blend(a - Self::ONE, a);
842 let mut b = notbig & Self::ONE;
843 b = notsmal.blend(b + t, b);
844 let z = a / b;
845
846 let zz = z * z;
847
848 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
850 re = re.mul_add(zz * z, z) + s;
851
852 re = (self.sign_bit()).blend(-re, re);
854
855 re
856 }
857
858 #[inline]
859 pub fn atan2(self, x: Self) -> Self {
860 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
863 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
864 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
865 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
866
867 let y = self;
868
869 let x1 = x.abs();
871 let y1 = y.abs();
872 let swapxy = y1.cmp_gt(x1);
873 let mut x2 = swapxy.blend(y1, x1);
875 let mut y2 = swapxy.blend(x1, y1);
876
877 let both_infinite = x.is_inf() & y.is_inf();
879 if both_infinite.any() {
880 let minus_one = -Self::ONE;
881 x2 = both_infinite.blend(x2 & minus_one, x2);
882 y2 = both_infinite.blend(y2 & minus_one, y2);
883 }
884
885 let t = y2 / x2;
887
888 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
891
892 let a = notsmal.blend(t - Self::ONE, t);
893 let b = notsmal.blend(t + Self::ONE, Self::ONE);
894 let s = notsmal & Self::FRAC_PI_4;
895 let z = a / b;
896
897 let zz = z * z;
898
899 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
901 re = re.mul_add(zz * z, z) + s;
902
903 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
905 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
906 re = (x.sign_bit()).blend(Self::PI - re, re);
907
908 re = (y.sign_bit()).blend(-re, re);
910
911 re
912 }
913
914 #[inline]
915 #[must_use]
916 pub fn sin_cos(self) -> (Self, Self) {
917 const_f32_as_f32x8!(DP1F, 0.78515625_f32 * 2.0);
921 const_f32_as_f32x8!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
922 const_f32_as_f32x8!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
923
924 const_f32_as_f32x8!(P0sinf, -1.6666654611E-1);
925 const_f32_as_f32x8!(P1sinf, 8.3321608736E-3);
926 const_f32_as_f32x8!(P2sinf, -1.9515295891E-4);
927
928 const_f32_as_f32x8!(P0cosf, 4.166664568298827E-2);
929 const_f32_as_f32x8!(P1cosf, -1.388731625493765E-3);
930 const_f32_as_f32x8!(P2cosf, 2.443315711809948E-5);
931
932 const_f32_as_f32x8!(TWO_OVER_PI, 2.0 / core::f32::consts::PI);
933
934 let xa = self.abs();
935
936 let y = (xa * TWO_OVER_PI).round();
938 let q: i32x8 = y.round_int();
939
940 let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
941
942 let x2 = x * x;
943 let mut s = polynomial_2!(x2, P0sinf, P1sinf, P2sinf) * (x * x2) + x;
944 let mut c = polynomial_2!(x2, P0cosf, P1cosf, P2cosf) * (x2 * x2)
945 + f32x8::from(0.5).mul_neg_add(x2, f32x8::from(1.0));
946
947 let swap = !(q & i32x8::from(1)).cmp_eq(i32x8::from(0));
948
949 let mut overflow: f32x8 = cast(q.cmp_gt(i32x8::from(0x2000000)));
950 overflow &= xa.is_finite();
951 s = overflow.blend(f32x8::from(0.0), s);
952 c = overflow.blend(f32x8::from(1.0), c);
953
954 let mut sin1 = cast::<_, f32x8>(swap).blend(c, s);
956 let sign_sin: i32x8 = (q << 30) ^ cast::<_, i32x8>(self);
957 sin1 = sin1.flip_signs(cast(sign_sin));
958
959 let mut cos1 = cast::<_, f32x8>(swap).blend(s, c);
961 let sign_cos: i32x8 = ((q + i32x8::from(1)) & i32x8::from(2)) << 30;
962 cos1 ^= cast::<_, f32x8>(sign_cos);
963
964 (sin1, cos1)
965 }
966 #[inline]
967 #[must_use]
968 pub fn sin(self) -> Self {
969 let (s, _) = self.sin_cos();
970 s
971 }
972 #[inline]
973 #[must_use]
974 pub fn cos(self) -> Self {
975 let (_, c) = self.sin_cos();
976 c
977 }
978 #[inline]
979 #[must_use]
980 pub fn tan(self) -> Self {
981 let (s, c) = self.sin_cos();
982 s / c
983 }
984 #[inline]
985 #[must_use]
986 pub fn to_degrees(self) -> Self {
987 const_f32_as_f32x8!(RAD_TO_DEG_RATIO, 180.0_f32 / core::f32::consts::PI);
988 self * RAD_TO_DEG_RATIO
989 }
990 #[inline]
991 #[must_use]
992 pub fn to_radians(self) -> Self {
993 const_f32_as_f32x8!(DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32);
994 self * DEG_TO_RAD_RATIO
995 }
996 #[inline]
997 #[must_use]
998 pub fn recip(self) -> Self {
999 pick! {
1000 if #[cfg(target_feature="avx")] {
1001 Self { avx: reciprocal_m256(self.avx) }
1002 } else {
1003 Self {
1004 a : self.a.recip(),
1005 b : self.b.recip(),
1006 }
1007 }
1008 }
1009 }
1010 #[inline]
1011 #[must_use]
1012 pub fn recip_sqrt(self) -> Self {
1013 pick! {
1014 if #[cfg(target_feature="avx")] {
1015 Self { avx: reciprocal_sqrt_m256(self.avx) }
1016 } else {
1017 Self {
1018 a : self.a.recip_sqrt(),
1019 b : self.b.recip_sqrt(),
1020 }
1021 }
1022 }
1023 }
1024 #[inline]
1025 #[must_use]
1026 pub fn sqrt(self) -> Self {
1027 pick! {
1028 if #[cfg(target_feature="avx")] {
1029 Self { avx: sqrt_m256(self.avx) }
1030 } else {
1031 Self {
1032 a : self.a.sqrt(),
1033 b : self.b.sqrt(),
1034 }
1035 }
1036 }
1037 }
1038 #[inline]
1039 #[must_use]
1040 pub fn move_mask(self) -> i32 {
1041 pick! {
1042 if #[cfg(target_feature="avx")] {
1043 move_mask_m256(self.avx)
1044 } else {
1045 (self.b.move_mask() << 4) | self.a.move_mask()
1046 }
1047 }
1048 }
1049 #[inline]
1050 #[must_use]
1051 pub fn any(self) -> bool {
1052 pick! {
1053 if #[cfg(target_feature="avx")] {
1054 move_mask_m256(self.avx) != 0
1055 } else {
1056 self.a.any() || self.b.any()
1057 }
1058 }
1059 }
1060 #[inline]
1061 #[must_use]
1062 pub fn all(self) -> bool {
1063 pick! {
1064 if #[cfg(target_feature="avx")] {
1065 move_mask_m256(self.avx) == 0b11111111
1066 } else {
1067 self.a.all() && self.b.all()
1068 }
1069 }
1070 }
1071 #[inline]
1072 #[must_use]
1073 pub fn none(self) -> bool {
1074 !self.any()
1075 }
1076
1077 #[inline]
1078 fn vm_pow2n(self) -> Self {
1079 const_f32_as_f32x8!(pow2_23, 8388608.0);
1080 const_f32_as_f32x8!(bias, 127.0);
1081 let a = self + (bias + pow2_23);
1082 let c = cast::<_, i32x8>(a) << 23;
1083 cast::<_, f32x8>(c)
1084 }
1085
1086 #[inline]
1088 #[must_use]
1089 pub fn exp(self) -> Self {
1090 const_f32_as_f32x8!(P0, 1.0 / 2.0);
1091 const_f32_as_f32x8!(P1, 1.0 / 6.0);
1092 const_f32_as_f32x8!(P2, 1. / 24.);
1093 const_f32_as_f32x8!(P3, 1. / 120.);
1094 const_f32_as_f32x8!(P4, 1. / 720.);
1095 const_f32_as_f32x8!(P5, 1. / 5040.);
1096 const_f32_as_f32x8!(LN2D_HI, 0.693359375);
1097 const_f32_as_f32x8!(LN2D_LO, -2.12194440e-4);
1098 let max_x = f32x8::from(87.3);
1099 let r = (self * Self::LOG2_E).round();
1100 let x = r.mul_neg_add(LN2D_HI, self);
1101 let x = r.mul_neg_add(LN2D_LO, x);
1102 let z = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1103 let x2 = x * x;
1104 let z = z.mul_add(x2, x);
1105 let n2 = Self::vm_pow2n(r);
1106 let z = (z + Self::ONE) * n2;
1107 let in_range = self.abs().cmp_lt(max_x);
1109 let in_range = in_range & self.is_finite();
1110 in_range.blend(z, Self::ZERO)
1111 }
1112
1113 #[inline]
1114 fn exponent(self) -> f32x8 {
1115 const_f32_as_f32x8!(pow2_23, 8388608.0);
1116 const_f32_as_f32x8!(bias, 127.0);
1117 let a = cast::<_, u32x8>(self);
1118 let b = a >> 23;
1119 let c = b | cast::<_, u32x8>(pow2_23);
1120 let d = cast::<_, f32x8>(c);
1121 let e = d - (pow2_23 + bias);
1122 e
1123 }
1124
1125 #[inline]
1126 fn fraction_2(self) -> Self {
1127 let t1 = cast::<_, u32x8>(self);
1128 let t2 = cast::<_, u32x8>(
1129 (t1 & u32x8::from(0x007FFFFF)) | u32x8::from(0x3F000000),
1130 );
1131 cast::<_, f32x8>(t2)
1132 }
1133 #[inline]
1134 fn is_zero_or_subnormal(self) -> Self {
1135 let t = cast::<_, i32x8>(self);
1136 let t = t & i32x8::splat(0x7F800000);
1137 i32x8::round_float(t.cmp_eq(i32x8::splat(0)))
1138 }
1139 #[inline]
1140 fn infinity() -> Self {
1141 cast::<_, f32x8>(i32x8::splat(0x7F800000))
1142 }
1143 #[inline]
1144 fn nan_log() -> Self {
1145 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1146 }
1147 #[inline]
1148 fn nan_pow() -> Self {
1149 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1150 }
1151 #[inline]
1152 pub fn sign_bit(self) -> Self {
1153 let t1 = cast::<_, i32x8>(self);
1154 let t2 = t1 >> 31;
1155 !cast::<_, f32x8>(t2).cmp_eq(f32x8::ZERO)
1156 }
1157
1158 #[inline]
1160 #[must_use]
1161 pub fn reduce_add(self) -> f32 {
1162 pick! {
1163 if #[cfg(target_feature="avx")]{
1165 let hi_quad = extract_m128_from_m256::<1>(self.avx);
1166 let lo_quad = cast_to_m128_from_m256(self.avx);
1167 let sum_quad = add_m128(lo_quad,hi_quad);
1168 let lo_dual = sum_quad;
1169 let hi_dual = move_high_low_m128(sum_quad,sum_quad);
1170 let sum_dual = add_m128(lo_dual,hi_dual);
1171 let lo = sum_dual;
1172 let hi = shuffle_abi_f32_all_m128::<0b_01>(sum_dual, sum_dual);
1173 let sum = add_m128_s(lo, hi);
1174 get_f32_from_m128_s(sum)
1175 } else {
1176 self.a.reduce_add() + self.b.reduce_add()
1177 }
1178 }
1179 }
1180
1181 #[inline]
1183 #[must_use]
1184 pub fn ln(self) -> Self {
1185 const_f32_as_f32x8!(HALF, 0.5);
1186 const_f32_as_f32x8!(P0, 3.3333331174E-1);
1187 const_f32_as_f32x8!(P1, -2.4999993993E-1);
1188 const_f32_as_f32x8!(P2, 2.0000714765E-1);
1189 const_f32_as_f32x8!(P3, -1.6668057665E-1);
1190 const_f32_as_f32x8!(P4, 1.4249322787E-1);
1191 const_f32_as_f32x8!(P5, -1.2420140846E-1);
1192 const_f32_as_f32x8!(P6, 1.1676998740E-1);
1193 const_f32_as_f32x8!(P7, -1.1514610310E-1);
1194 const_f32_as_f32x8!(P8, 7.0376836292E-2);
1195 const_f32_as_f32x8!(LN2F_HI, 0.693359375);
1196 const_f32_as_f32x8!(LN2F_LO, -2.12194440e-4);
1197 const_f32_as_f32x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1198
1199 let x1 = self;
1200 let x = Self::fraction_2(x1);
1201 let e = Self::exponent(x1);
1202 let mask = x.cmp_gt(Self::SQRT_2 * HALF);
1203 let x = (!mask).blend(x + x, x);
1204 let fe = mask.blend(e + Self::ONE, e);
1205 let x = x - Self::ONE;
1206 let res = polynomial_8!(x, P0, P1, P2, P3, P4, P5, P6, P7, P8);
1207 let x2 = x * x;
1208 let res = x2 * x * res;
1209 let res = fe.mul_add(LN2F_LO, res);
1210 let res = res + x2.mul_neg_add(HALF, x);
1211 let res = fe.mul_add(LN2F_HI, res);
1212 let overflow = !self.is_finite();
1213 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1214 let mask = overflow | underflow;
1215 if !mask.any() {
1216 res
1217 } else {
1218 let is_zero = self.is_zero_or_subnormal();
1219 let res = underflow.blend(Self::nan_log(), res);
1220 let res = is_zero.blend(Self::infinity(), res);
1221 let res = overflow.blend(self, res);
1222 res
1223 }
1224 }
1225
1226 #[inline]
1227 #[must_use]
1228 pub fn log2(self) -> Self {
1229 Self::ln(self) * Self::LOG2_E
1230 }
1231 #[inline]
1232 #[must_use]
1233 pub fn log10(self) -> Self {
1234 Self::ln(self) * Self::LOG10_E
1235 }
1236
1237 #[inline]
1238 #[must_use]
1239 pub fn pow_f32x8(self, y: Self) -> Self {
1240 const_f32_as_f32x8!(ln2f_hi, 0.693359375);
1241 const_f32_as_f32x8!(ln2f_lo, -2.12194440e-4);
1242 const_f32_as_f32x8!(P0logf, 3.3333331174E-1);
1243 const_f32_as_f32x8!(P1logf, -2.4999993993E-1);
1244 const_f32_as_f32x8!(P2logf, 2.0000714765E-1);
1245 const_f32_as_f32x8!(P3logf, -1.6668057665E-1);
1246 const_f32_as_f32x8!(P4logf, 1.4249322787E-1);
1247 const_f32_as_f32x8!(P5logf, -1.2420140846E-1);
1248 const_f32_as_f32x8!(P6logf, 1.1676998740E-1);
1249 const_f32_as_f32x8!(P7logf, -1.1514610310E-1);
1250 const_f32_as_f32x8!(P8logf, 7.0376836292E-2);
1251
1252 const_f32_as_f32x8!(p2expf, 1.0 / 2.0); const_f32_as_f32x8!(p3expf, 1.0 / 6.0);
1254 const_f32_as_f32x8!(p4expf, 1.0 / 24.0);
1255 const_f32_as_f32x8!(p5expf, 1.0 / 120.0);
1256 const_f32_as_f32x8!(p6expf, 1.0 / 720.0);
1257 const_f32_as_f32x8!(p7expf, 1.0 / 5040.0);
1258
1259 let x1 = self.abs();
1260 let x = x1.fraction_2();
1261 let mask = x.cmp_gt(f32x8::SQRT_2 * f32x8::HALF);
1262 let x = (!mask).blend(x + x, x);
1263
1264 let x = x - f32x8::ONE;
1265 let x2 = x * x;
1266 let lg1 = polynomial_8!(
1267 x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf
1268 );
1269 let lg1 = lg1 * x2 * x;
1270
1271 let ef = x1.exponent();
1272 let ef = mask.blend(ef + f32x8::ONE, ef);
1273 let e1 = (ef * y).round();
1274 let yr = ef.mul_sub(y, e1);
1275
1276 let lg = f32x8::HALF.mul_neg_add(x2, x) + lg1;
1277 let x2_err = (f32x8::HALF * x).mul_sub(x, f32x8::HALF * x2);
1278 let lg_err = f32x8::HALF.mul_add(x2, lg - x) - lg1;
1279
1280 let e2 = (lg * y * f32x8::LOG2_E).round();
1281 let v = lg.mul_sub(y, e2 * ln2f_hi);
1282 let v = e2.mul_neg_add(ln2f_lo, v);
1283 let v = v - (lg_err + x2_err).mul_sub(y, yr * f32x8::LN_2);
1284
1285 let x = v;
1286 let e3 = (x * f32x8::LOG2_E).round();
1287 let x = e3.mul_neg_add(f32x8::LN_2, x);
1288 let x2 = x * x;
1289 let z = x2.mul_add(
1290 polynomial_5!(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf),
1291 x + f32x8::ONE,
1292 );
1293
1294 let ee = e1 + e2 + e3;
1295 let ei = cast::<_, i32x8>(ee.round_int());
1296 let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
1297
1298 let overflow = cast::<_, f32x8>(ej.cmp_gt(i32x8::splat(0x0FF)))
1299 | (ee.cmp_gt(f32x8::splat(300.0)));
1300 let underflow = cast::<_, f32x8>(ej.cmp_lt(i32x8::splat(0x000)))
1301 | (ee.cmp_lt(f32x8::splat(-300.0)));
1302
1303 let z = cast::<_, f32x8>(cast::<_, i32x8>(z) + (ei << 23));
1305 let z = underflow.blend(f32x8::ZERO, z);
1307 let z = overflow.blend(Self::infinity(), z);
1308
1309 let x_zero = self.is_zero_or_subnormal();
1311 let z = x_zero.blend(
1312 y.cmp_lt(f32x8::ZERO).blend(
1313 Self::infinity(),
1314 y.cmp_eq(f32x8::ZERO).blend(f32x8::ONE, f32x8::ZERO),
1315 ),
1316 z,
1317 );
1318
1319 let x_sign = self.sign_bit();
1320 let z = if x_sign.any() {
1321 let yi = y.cmp_eq(y.round());
1323
1324 let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
1326
1327 let z1 =
1328 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1329
1330 x_sign.blend(z1, z)
1331 } else {
1332 z
1333 };
1334
1335 let x_finite = self.is_finite();
1336 let y_finite = y.is_finite();
1337 let e_finite = ee.is_finite();
1338 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1339 return z;
1340 }
1341
1342 (self.is_nan() | y.is_nan()).blend(self + y, z)
1343 }
1344 #[inline]
1345 pub fn powf(self, y: f32) -> Self {
1346 Self::pow_f32x8(self, f32x8::splat(y))
1347 }
1348
1349 #[must_use]
1351 #[inline]
1352 pub fn transpose(data: [f32x8; 8]) -> [f32x8; 8] {
1353 pick! {
1354 if #[cfg(target_feature="avx")] {
1355 let a0 = unpack_lo_m256(data[0].avx, data[1].avx);
1356 let a1 = unpack_hi_m256(data[0].avx, data[1].avx);
1357 let a2 = unpack_lo_m256(data[2].avx, data[3].avx);
1358 let a3 = unpack_hi_m256(data[2].avx, data[3].avx);
1359 let a4 = unpack_lo_m256(data[4].avx, data[5].avx);
1360 let a5 = unpack_hi_m256(data[4].avx, data[5].avx);
1361 let a6 = unpack_lo_m256(data[6].avx, data[7].avx);
1362 let a7 = unpack_hi_m256(data[6].avx, data[7].avx);
1363
1364 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
1365 (z << 6) | (y << 4) | (x << 2) | w
1366 }
1367
1368 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
1369 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
1370
1371 let b0 = shuffle_m256::<SHUFF_LO>(a0,a2);
1374 let b1 = shuffle_m256::<SHUFF_HI>(a0,a2);
1375 let b2 = shuffle_m256::<SHUFF_LO>(a1,a3);
1376 let b3 = shuffle_m256::<SHUFF_HI>(a1,a3);
1377 let b4 = shuffle_m256::<SHUFF_LO>(a4,a6);
1378 let b5 = shuffle_m256::<SHUFF_HI>(a4,a6);
1379 let b6 = shuffle_m256::<SHUFF_LO>(a5,a7);
1380 let b7 = shuffle_m256::<SHUFF_HI>(a5,a7);
1381
1382 [
1383 f32x8 { avx: permute2z_m256::<0x20>(b0, b4) },
1384 f32x8 { avx: permute2z_m256::<0x20>(b1, b5) },
1385 f32x8 { avx: permute2z_m256::<0x20>(b2, b6) },
1386 f32x8 { avx: permute2z_m256::<0x20>(b3, b7) },
1387 f32x8 { avx: permute2z_m256::<0x31>(b0, b4) },
1388 f32x8 { avx: permute2z_m256::<0x31>(b1, b5) },
1389 f32x8 { avx: permute2z_m256::<0x31>(b2, b6) },
1390 f32x8 { avx: permute2z_m256::<0x31>(b3, b7) }
1391 ]
1392 } else {
1393 #[inline(always)]
1396 fn transpose_column(data: &[f32x8; 8], index: usize) -> f32x8 {
1397 f32x8::new([
1398 data[0].as_array_ref()[index],
1399 data[1].as_array_ref()[index],
1400 data[2].as_array_ref()[index],
1401 data[3].as_array_ref()[index],
1402 data[4].as_array_ref()[index],
1403 data[5].as_array_ref()[index],
1404 data[6].as_array_ref()[index],
1405 data[7].as_array_ref()[index],
1406 ])
1407 }
1408
1409 [
1410 transpose_column(&data, 0),
1411 transpose_column(&data, 1),
1412 transpose_column(&data, 2),
1413 transpose_column(&data, 3),
1414 transpose_column(&data, 4),
1415 transpose_column(&data, 5),
1416 transpose_column(&data, 6),
1417 transpose_column(&data, 7),
1418 ]
1419 }
1420 }
1421 }
1422
1423 #[inline]
1424 pub fn to_array(self) -> [f32; 8] {
1425 cast(self)
1426 }
1427
1428 #[inline]
1429 pub fn as_array_ref(&self) -> &[f32; 8] {
1430 cast_ref(self)
1431 }
1432
1433 #[inline]
1434 pub fn as_array_mut(&mut self) -> &mut [f32; 8] {
1435 cast_mut(self)
1436 }
1437
1438 #[inline]
1439 pub fn from_i32x8(v: i32x8) -> Self {
1440 pick! {
1441 if #[cfg(target_feature="avx2")] {
1442 Self { avx: convert_to_m256_from_i32_m256i(v.avx2) }
1443 } else {
1444 Self::new([
1445 v.as_array_ref()[0] as f32,
1446 v.as_array_ref()[1] as f32,
1447 v.as_array_ref()[2] as f32,
1448 v.as_array_ref()[3] as f32,
1449 v.as_array_ref()[4] as f32,
1450 v.as_array_ref()[5] as f32,
1451 v.as_array_ref()[6] as f32,
1452 v.as_array_ref()[7] as f32,
1453 ])
1454 }
1455 }
1456 }
1457}
1458
1459impl Not for f32x8 {
1460 type Output = Self;
1461 #[inline]
1462 fn not(self) -> Self {
1463 pick! {
1464 if #[cfg(target_feature="avx")] {
1465 Self { avx: self.avx.not() }
1466 } else {
1467 Self {
1468 a : self.a.not(),
1469 b : self.b.not(),
1470 }
1471 }
1472 }
1473 }
1474}