1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f64x4 { pub(crate) avx: m256d }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f64x4 { pub(crate) a: f64x2, pub(crate) b: f64x2 }
12 }
13}
14
15macro_rules! const_f64_as_f64x4 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f64x4 = f64x4::new([$f; 4]);
19 };
20}
21
22impl f64x4 {
23 const_f64_as_f64x4!(ONE, 1.0);
24 const_f64_as_f64x4!(ZERO, 0.0);
25 const_f64_as_f64x4!(HALF, 0.5);
26 const_f64_as_f64x4!(E, core::f64::consts::E);
27 const_f64_as_f64x4!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
28 const_f64_as_f64x4!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
29 const_f64_as_f64x4!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
30 const_f64_as_f64x4!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
31 const_f64_as_f64x4!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
32 const_f64_as_f64x4!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
33 const_f64_as_f64x4!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
34 const_f64_as_f64x4!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
35 const_f64_as_f64x4!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
36 const_f64_as_f64x4!(LN_2, core::f64::consts::LN_2);
37 const_f64_as_f64x4!(LN_10, core::f64::consts::LN_10);
38 const_f64_as_f64x4!(LOG2_E, core::f64::consts::LOG2_E);
39 const_f64_as_f64x4!(LOG10_E, core::f64::consts::LOG10_E);
40 const_f64_as_f64x4!(LOG10_2, core::f64::consts::LOG10_2);
41 const_f64_as_f64x4!(LOG2_10, core::f64::consts::LOG2_10);
42 const_f64_as_f64x4!(PI, core::f64::consts::PI);
43 const_f64_as_f64x4!(SQRT_2, core::f64::consts::SQRT_2);
44 const_f64_as_f64x4!(TAU, core::f64::consts::TAU);
45}
46
47unsafe impl Zeroable for f64x4 {}
48unsafe impl Pod for f64x4 {}
49
50impl Add for f64x4 {
51 type Output = Self;
52 #[inline]
53 #[must_use]
54 fn add(self, rhs: Self) -> Self::Output {
55 pick! {
56 if #[cfg(target_feature="avx")] {
57 Self { avx: add_m256d(self.avx, rhs.avx) }
58 } else {
59 Self {
60 a : self.a.add(rhs.a),
61 b : self.b.add(rhs.b),
62 }
63 }
64 }
65 }
66}
67
68impl Sub for f64x4 {
69 type Output = Self;
70 #[inline]
71 #[must_use]
72 fn sub(self, rhs: Self) -> Self::Output {
73 pick! {
74 if #[cfg(target_feature="avx")] {
75 Self { avx: sub_m256d(self.avx, rhs.avx) }
76 } else {
77 Self {
78 a : self.a.sub(rhs.a),
79 b : self.b.sub(rhs.b),
80 }
81 }
82 }
83 }
84}
85
86impl Mul for f64x4 {
87 type Output = Self;
88 #[inline]
89 #[must_use]
90 fn mul(self, rhs: Self) -> Self::Output {
91 pick! {
92 if #[cfg(target_feature="avx")] {
93 Self { avx: mul_m256d(self.avx, rhs.avx) }
94 } else {
95 Self {
96 a : self.a.mul(rhs.a),
97 b : self.b.mul(rhs.b),
98 }
99 }
100 }
101 }
102}
103
104impl Div for f64x4 {
105 type Output = Self;
106 #[inline]
107 #[must_use]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256d(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Add<f64> for f64x4 {
123 type Output = Self;
124 #[inline]
125 #[must_use]
126 fn add(self, rhs: f64) -> Self::Output {
127 self.add(Self::splat(rhs))
128 }
129}
130
131impl Sub<f64> for f64x4 {
132 type Output = Self;
133 #[inline]
134 #[must_use]
135 fn sub(self, rhs: f64) -> Self::Output {
136 self.sub(Self::splat(rhs))
137 }
138}
139
140impl Mul<f64> for f64x4 {
141 type Output = Self;
142 #[inline]
143 #[must_use]
144 fn mul(self, rhs: f64) -> Self::Output {
145 self.mul(Self::splat(rhs))
146 }
147}
148
149impl Div<f64> for f64x4 {
150 type Output = Self;
151 #[inline]
152 #[must_use]
153 fn div(self, rhs: f64) -> Self::Output {
154 self.div(Self::splat(rhs))
155 }
156}
157
158impl Add<f64x4> for f64 {
159 type Output = f64x4;
160 #[inline]
161 #[must_use]
162 fn add(self, rhs: f64x4) -> Self::Output {
163 f64x4::splat(self).add(rhs)
164 }
165}
166
167impl Sub<f64x4> for f64 {
168 type Output = f64x4;
169 #[inline]
170 #[must_use]
171 fn sub(self, rhs: f64x4) -> Self::Output {
172 f64x4::splat(self).sub(rhs)
173 }
174}
175
176impl Mul<f64x4> for f64 {
177 type Output = f64x4;
178 #[inline]
179 #[must_use]
180 fn mul(self, rhs: f64x4) -> Self::Output {
181 f64x4::splat(self).mul(rhs)
182 }
183}
184
185impl Div<f64x4> for f64 {
186 type Output = f64x4;
187 #[inline]
188 #[must_use]
189 fn div(self, rhs: f64x4) -> Self::Output {
190 f64x4::splat(self).div(rhs)
191 }
192}
193
194impl BitAnd for f64x4 {
195 type Output = Self;
196 #[inline]
197 #[must_use]
198 fn bitand(self, rhs: Self) -> Self::Output {
199 pick! {
200 if #[cfg(target_feature="avx")] {
201 Self { avx: bitand_m256d(self.avx, rhs.avx) }
202 } else {
203 Self {
204 a : self.a.bitand(rhs.a),
205 b : self.b.bitand(rhs.b),
206 }
207 }
208 }
209 }
210}
211
212impl BitOr for f64x4 {
213 type Output = Self;
214 #[inline]
215 #[must_use]
216 fn bitor(self, rhs: Self) -> Self::Output {
217 pick! {
218 if #[cfg(target_feature="avx")] {
219 Self { avx: bitor_m256d(self.avx, rhs.avx) }
220 } else {
221 Self {
222 a : self.a.bitor(rhs.a),
223 b : self.b.bitor(rhs.b),
224 }
225 }
226 }
227 }
228}
229
230impl BitXor for f64x4 {
231 type Output = Self;
232 #[inline]
233 #[must_use]
234 fn bitxor(self, rhs: Self) -> Self::Output {
235 pick! {
236 if #[cfg(target_feature="avx")] {
237 Self { avx: bitxor_m256d(self.avx, rhs.avx) }
238 } else {
239 Self {
240 a : self.a.bitxor(rhs.a),
241 b : self.b.bitxor(rhs.b),
242 }
243 }
244 }
245 }
246}
247
248impl CmpEq for f64x4 {
249 type Output = Self;
250 #[inline]
251 #[must_use]
252 fn cmp_eq(self, rhs: Self) -> Self::Output {
253 pick! {
254 if #[cfg(target_feature="avx")]{
255 Self { avx: cmp_op_mask_m256d::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
256 } else {
257 Self {
258 a : self.a.cmp_eq(rhs.a),
259 b : self.b.cmp_eq(rhs.b),
260 }
261 }
262 }
263 }
264}
265
266impl CmpGe for f64x4 {
267 type Output = Self;
268 #[inline]
269 #[must_use]
270 fn cmp_ge(self, rhs: Self) -> Self::Output {
271 pick! {
272 if #[cfg(target_feature="avx")]{
273 Self { avx: cmp_op_mask_m256d::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
274 } else {
275 Self {
276 a : self.a.cmp_ge(rhs.a),
277 b : self.b.cmp_ge(rhs.b),
278 }
279 }
280 }
281 }
282}
283
284impl CmpGt for f64x4 {
285 type Output = Self;
286 #[inline]
287 #[must_use]
288 fn cmp_gt(self, rhs: Self) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="avx")]{
291 Self { avx: cmp_op_mask_m256d::<{cmp_op!( GreaterThanOrdered)}>(self.avx, rhs.avx) }
292 } else {
293 Self {
294 a : self.a.cmp_gt(rhs.a),
295 b : self.b.cmp_gt(rhs.b),
296 }
297 }
298 }
299 }
300}
301
302impl CmpNe for f64x4 {
303 type Output = Self;
304 #[inline]
305 #[must_use]
306 fn cmp_ne(self, rhs: Self) -> Self::Output {
307 pick! {
308 if #[cfg(target_feature="avx")]{
309 Self { avx: cmp_op_mask_m256d::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
310 } else {
311 Self {
312 a : self.a.cmp_ne(rhs.a),
313 b : self.b.cmp_ne(rhs.b),
314 }
315 }
316 }
317 }
318}
319
320impl CmpLe for f64x4 {
321 type Output = Self;
322 #[inline]
323 #[must_use]
324 fn cmp_le(self, rhs: Self) -> Self::Output {
325 pick! {
326 if #[cfg(target_feature="avx")]{
327 Self { avx: cmp_op_mask_m256d::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
328 } else {
329 Self {
330 a : self.a.cmp_le(rhs.a),
331 b : self.b.cmp_le(rhs.b),
332 }
333 }
334 }
335 }
336}
337
338impl CmpLt for f64x4 {
339 type Output = Self;
340 #[inline]
341 #[must_use]
342 fn cmp_lt(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx")]{
345 Self { avx: cmp_op_mask_m256d::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
346 } else {
347 Self {
348 a : self.a.cmp_lt(rhs.a),
349 b : self.b.cmp_lt(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356impl f64x4 {
357 #[inline]
358 #[must_use]
359 pub const fn new(array: [f64; 4]) -> Self {
360 unsafe { core::intrinsics::transmute(array) }
361 }
362 #[inline]
363 #[must_use]
364 pub fn blend(self, t: Self, f: Self) -> Self {
365 pick! {
366 if #[cfg(target_feature="avx")] {
367 Self { avx: blend_varying_m256d(f.avx, t.avx, self.avx) }
368 } else {
369 Self {
370 a : self.a.blend(t.a, f.a),
371 b : self.b.blend(t.b, f.b),
372 }
373 }
374 }
375 }
376
377 #[inline]
378 #[must_use]
379 pub fn abs(self) -> Self {
380 pick! {
381 if #[cfg(target_feature="avx")] {
382 let non_sign_bits = f64x4::from(f64::from_bits(i64::MAX as u64));
383 self & non_sign_bits
384 } else {
385 Self {
386 a : self.a.abs(),
387 b : self.b.abs(),
388 }
389 }
390 }
391 }
392
393 #[inline]
394 #[must_use]
395 pub fn floor(self) -> Self {
396 pick! {
397 if #[cfg(target_feature="avx")] {
398 Self { avx: floor_m256d(self.avx) }
399 } else {
400 Self {
401 a : self.a.floor(),
402 b : self.b.floor(),
403 }
404 }
405 }
406 }
407 #[inline]
408 #[must_use]
409 pub fn ceil(self) -> Self {
410 pick! {
411 if #[cfg(target_feature="avx")] {
412 Self { avx: ceil_m256d(self.avx) }
413 } else {
414 Self {
415 a : self.a.ceil(),
416 b : self.b.ceil(),
417 }
418 }
419 }
420 }
421
422 #[inline]
426 #[must_use]
427 pub fn fast_max(self, rhs: Self) -> Self {
428 pick! {
429 if #[cfg(target_feature="avx")] {
430 Self { avx: max_m256d(self.avx, rhs.avx) }
431 } else {
432 Self {
433 a : self.a.fast_max(rhs.a),
434 b : self.b.fast_max(rhs.b),
435 }
436 }
437 }
438 }
439
440 #[inline]
444 #[must_use]
445 pub fn max(self, rhs: Self) -> Self {
446 pick! {
447 if #[cfg(target_feature="avx")] {
448 rhs.is_nan().blend(self, Self { avx: max_m256d(self.avx, rhs.avx) })
452 } else {
453 Self {
454 a : self.a.max(rhs.a),
455 b : self.b.max(rhs.b),
456 }
457 }
458 }
459 }
460
461 #[inline]
465 #[must_use]
466 pub fn fast_min(self, rhs: Self) -> Self {
467 pick! {
468 if #[cfg(target_feature="avx")] {
469 Self { avx: min_m256d(self.avx, rhs.avx) }
470 } else {
471 Self {
472 a : self.a.fast_min(rhs.a),
473 b : self.b.fast_min(rhs.b),
474 }
475 }
476 }
477 }
478
479 #[inline]
483 #[must_use]
484 pub fn min(self, rhs: Self) -> Self {
485 pick! {
486 if #[cfg(target_feature="avx")] {
487 rhs.is_nan().blend(self, Self { avx: min_m256d(self.avx, rhs.avx) })
491 } else {
492 Self {
493 a : self.a.min(rhs.a),
494 b : self.b.min(rhs.b),
495 }
496 }
497 }
498 }
499
500 #[inline]
501 #[must_use]
502 pub fn is_nan(self) -> Self {
503 pick! {
504 if #[cfg(target_feature="avx")] {
505 Self { avx: cmp_op_mask_m256d::<{cmp_op!(Unordered)}>(self.avx, self.avx ) }
506 } else {
507 Self {
508 a : self.a.is_nan(),
509 b : self.b.is_nan(),
510 }
511 }
512 }
513 }
514
515 #[inline]
516 #[must_use]
517 pub fn is_finite(self) -> Self {
518 let shifted_exp_mask = u64x4::from(0xFFE0000000000000);
519 let u: u64x4 = cast(self);
520 let shift_u = u << 1_u64;
521 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
522 cast(out)
523 }
524
525 #[inline]
526 #[must_use]
527 pub fn is_inf(self) -> Self {
528 let shifted_inf = u64x4::from(0xFFE0000000000000);
529 let u: u64x4 = cast(self);
530 let shift_u = u << 1_u64;
531 let out = (shift_u).cmp_eq(shifted_inf);
532 cast(out)
533 }
534
535 #[inline]
536 #[must_use]
537 pub fn round(self) -> Self {
538 pick! {
539 if #[cfg(target_feature="avx")] {
540 Self { avx: round_m256d::<{round_op!(Nearest)}>(self.avx) }
541 } else {
542 Self {
543 a : self.a.round(),
544 b : self.b.round(),
545 }
546 }
547 }
548 }
549
550 #[inline]
551 #[must_use]
552 pub fn round_int(self) -> i64x4 {
553 let rounded: [f64; 4] = cast(self.round());
555 cast([
556 rounded[0] as i64,
557 rounded[1] as i64,
558 rounded[2] as i64,
559 rounded[3] as i64,
560 ])
561 }
562
563 #[inline]
564 #[must_use]
565 pub fn mul_add(self, m: Self, a: Self) -> Self {
566 pick! {
567 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
568 Self { avx: fused_mul_add_m256d(self.avx, m.avx, a.avx) }
569 } else if #[cfg(target_feature="avx")] {
570 (self * m) + a
572 } else {
573 Self {
574 a : self.a.mul_add(m.a, a.a),
575 b : self.b.mul_add(m.b, a.b),
576 }
577 }
578 }
579 }
580
581 #[inline]
582 #[must_use]
583 pub fn mul_sub(self, m: Self, a: Self) -> Self {
584 pick! {
585 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
586 Self { avx: fused_mul_sub_m256d(self.avx, m.avx, a.avx) }
587 } else if #[cfg(target_feature="avx")] {
588 (self * m) - a
590 } else {
591 Self {
592 a : self.a.mul_sub(m.a, a.a),
593 b : self.b.mul_sub(m.b, a.b),
594 }
595 }
596 }
597 }
598
599 #[inline]
600 #[must_use]
601 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
602 pick! {
603 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
604 Self { avx: fused_mul_neg_add_m256d(self.avx, m.avx, a.avx) }
605 } else if #[cfg(target_feature="avx")] {
606 a - (self * m)
608 } else {
609 Self {
610 a : self.a.mul_neg_add(m.a, a.a),
611 b : self.b.mul_neg_add(m.b, a.b),
612 }
613 }
614 }
615 }
616
617 #[inline]
618 #[must_use]
619 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
620 pick! {
621 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
622 Self { avx: fused_mul_neg_sub_m256d(self.avx, m.avx, a.avx) }
623 } else if #[cfg(target_feature="avx")] {
624 -(self * m) - a
626 } else {
627 Self {
628 a : self.a.mul_neg_sub(m.a, a.a),
629 b : self.b.mul_neg_sub(m.b, a.b),
630 }
631 }
632 }
633 }
634
635 #[inline]
636 #[must_use]
637 pub fn flip_signs(self, signs: Self) -> Self {
638 self ^ (signs & Self::from(-0.0))
639 }
640
641 #[inline]
642 #[must_use]
643 pub fn copysign(self, sign: Self) -> Self {
644 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
645 (self & magnitude_mask) | (sign & Self::from(-0.0))
646 }
647
648 #[inline]
649 pub fn asin_acos(self) -> (Self, Self) {
650 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
653 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
654 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
655 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
656 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
657
658 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
659 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
660 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
661 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
662
663 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
664 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
665 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
666 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
667 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
668 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
669
670 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
671 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
672 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
673 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
674 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
675
676 let xa = self.abs();
677
678 let big = xa.cmp_ge(f64x4::splat(0.625));
679
680 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
681
682 let x2 = x1 * x1;
683 let x3 = x2 * x1;
684 let x4 = x2 * x2;
685 let x5 = x4 * x1;
686
687 let do_big = big.any();
688 let do_small = !big.all();
689
690 let mut rx = f64x4::default();
691 let mut sx = f64x4::default();
692 let mut px = f64x4::default();
693 let mut qx = f64x4::default();
694
695 if do_big {
696 rx = x3.mul_add(R3asin, x2 * R2asin)
697 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
698 sx =
699 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
700 }
701
702 if do_small {
703 px = x3.mul_add(P3asin, P0asin)
704 + x4.mul_add(P4asin, x1 * P1asin)
705 + x5.mul_add(P5asin, x2 * P2asin);
706 qx = x4.mul_add(Q4asin, x5)
707 + x3.mul_add(Q3asin, x1 * Q1asin)
708 + x2.mul_add(Q2asin, Q0asin);
709 };
710
711 let vx = big.blend(rx, px);
712 let wx = big.blend(sx, qx);
713
714 let y1 = vx / wx * x1;
715
716 let mut z1 = f64x4::default();
717 let mut z2 = f64x4::default();
718 if do_big {
719 let xb = (x1 + x1).sqrt();
720 z1 = xb.mul_add(y1, xb);
721 }
722
723 if do_small {
724 z2 = xa.mul_add(y1, xa);
725 }
726
727 let z3 = f64x4::FRAC_PI_2 - z1;
729 let asin = big.blend(z3, z2);
730 let asin = asin.flip_signs(self);
731
732 let z3 = self.cmp_lt(f64x4::ZERO).blend(f64x4::PI - z1, z1);
734 let z4 = f64x4::FRAC_PI_2 - z2.flip_signs(self);
735 let acos = big.blend(z3, z4);
736
737 (asin, acos)
738 }
739
740 #[inline]
741 pub fn acos(self) -> Self {
742 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
745 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
746 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
747 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
748 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
749
750 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
751 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
752 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
753 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
754
755 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
756 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
757 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
758 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
759 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
760 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
761
762 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
763 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
764 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
765 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
766 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
767
768 let xa = self.abs();
769
770 let big = xa.cmp_ge(f64x4::splat(0.625));
771
772 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
773
774 let x2 = x1 * x1;
775 let x3 = x2 * x1;
776 let x4 = x2 * x2;
777 let x5 = x4 * x1;
778
779 let do_big = big.any();
780 let do_small = !big.all();
781
782 let mut rx = f64x4::default();
783 let mut sx = f64x4::default();
784 let mut px = f64x4::default();
785 let mut qx = f64x4::default();
786
787 if do_big {
788 rx = x3.mul_add(R3asin, x2 * R2asin)
789 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
790 sx =
791 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
792 }
793 if do_small {
794 px = x3.mul_add(P3asin, P0asin)
795 + x4.mul_add(P4asin, x1 * P1asin)
796 + x5.mul_add(P5asin, x2 * P2asin);
797 qx = x4.mul_add(Q4asin, x5)
798 + x3.mul_add(Q3asin, x1 * Q1asin)
799 + x2.mul_add(Q2asin, Q0asin);
800 };
801
802 let vx = big.blend(rx, px);
803 let wx = big.blend(sx, qx);
804
805 let y1 = vx / wx * x1;
806
807 let mut z1 = f64x4::default();
808 let mut z2 = f64x4::default();
809 if do_big {
810 let xb = (x1 + x1).sqrt();
811 z1 = xb.mul_add(y1, xb);
812 }
813
814 if do_small {
815 z2 = xa.mul_add(y1, xa);
816 }
817
818 let z3 = self.cmp_lt(f64x4::ZERO).blend(f64x4::PI - z1, z1);
820 let z4 = f64x4::FRAC_PI_2 - z2.flip_signs(self);
821 let acos = big.blend(z3, z4);
822
823 acos
824 }
825 #[inline]
826 #[must_use]
827 pub fn asin(self) -> Self {
828 const_f64_as_f64x4!(R4asin, 2.967721961301243206100E-3);
831 const_f64_as_f64x4!(R3asin, -5.634242780008963776856E-1);
832 const_f64_as_f64x4!(R2asin, 6.968710824104713396794E0);
833 const_f64_as_f64x4!(R1asin, -2.556901049652824852289E1);
834 const_f64_as_f64x4!(R0asin, 2.853665548261061424989E1);
835
836 const_f64_as_f64x4!(S3asin, -2.194779531642920639778E1);
837 const_f64_as_f64x4!(S2asin, 1.470656354026814941758E2);
838 const_f64_as_f64x4!(S1asin, -3.838770957603691357202E2);
839 const_f64_as_f64x4!(S0asin, 3.424398657913078477438E2);
840
841 const_f64_as_f64x4!(P5asin, 4.253011369004428248960E-3);
842 const_f64_as_f64x4!(P4asin, -6.019598008014123785661E-1);
843 const_f64_as_f64x4!(P3asin, 5.444622390564711410273E0);
844 const_f64_as_f64x4!(P2asin, -1.626247967210700244449E1);
845 const_f64_as_f64x4!(P1asin, 1.956261983317594739197E1);
846 const_f64_as_f64x4!(P0asin, -8.198089802484824371615E0);
847
848 const_f64_as_f64x4!(Q4asin, -1.474091372988853791896E1);
849 const_f64_as_f64x4!(Q3asin, 7.049610280856842141659E1);
850 const_f64_as_f64x4!(Q2asin, -1.471791292232726029859E2);
851 const_f64_as_f64x4!(Q1asin, 1.395105614657485689735E2);
852 const_f64_as_f64x4!(Q0asin, -4.918853881490881290097E1);
853
854 let xa = self.abs();
855
856 let big = xa.cmp_ge(f64x4::splat(0.625));
857
858 let x1 = big.blend(f64x4::splat(1.0) - xa, xa * xa);
859
860 let x2 = x1 * x1;
861 let x3 = x2 * x1;
862 let x4 = x2 * x2;
863 let x5 = x4 * x1;
864
865 let do_big = big.any();
866 let do_small = !big.all();
867
868 let mut rx = f64x4::default();
869 let mut sx = f64x4::default();
870 let mut px = f64x4::default();
871 let mut qx = f64x4::default();
872
873 if do_big {
874 rx = x3.mul_add(R3asin, x2 * R2asin)
875 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
876 sx =
877 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
878 }
879 if do_small {
880 px = x3.mul_add(P3asin, P0asin)
881 + x4.mul_add(P4asin, x1 * P1asin)
882 + x5.mul_add(P5asin, x2 * P2asin);
883 qx = x4.mul_add(Q4asin, x5)
884 + x3.mul_add(Q3asin, x1 * Q1asin)
885 + x2.mul_add(Q2asin, Q0asin);
886 };
887
888 let vx = big.blend(rx, px);
889 let wx = big.blend(sx, qx);
890
891 let y1 = vx / wx * x1;
892
893 let mut z1 = f64x4::default();
894 let mut z2 = f64x4::default();
895 if do_big {
896 let xb = (x1 + x1).sqrt();
897 z1 = xb.mul_add(y1, xb);
898 }
899
900 if do_small {
901 z2 = xa.mul_add(y1, xa);
902 }
903
904 let z3 = f64x4::FRAC_PI_2 - z1;
906 let asin = big.blend(z3, z2);
907 let asin = asin.flip_signs(self);
908
909 asin
910 }
911
912 #[inline]
913 pub fn atan(self) -> Self {
914 const_f64_as_f64x4!(MORE_BITS, 6.123233995736765886130E-17);
917 const_f64_as_f64x4!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
918 const_f64_as_f64x4!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
919
920 const_f64_as_f64x4!(P4atan, -8.750608600031904122785E-1);
921 const_f64_as_f64x4!(P3atan, -1.615753718733365076637E1);
922 const_f64_as_f64x4!(P2atan, -7.500855792314704667340E1);
923 const_f64_as_f64x4!(P1atan, -1.228866684490136173410E2);
924 const_f64_as_f64x4!(P0atan, -6.485021904942025371773E1);
925
926 const_f64_as_f64x4!(Q4atan, 2.485846490142306297962E1);
927 const_f64_as_f64x4!(Q3atan, 1.650270098316988542046E2);
928 const_f64_as_f64x4!(Q2atan, 4.328810604912902668951E2);
929 const_f64_as_f64x4!(Q1atan, 4.853903996359136964868E2);
930 const_f64_as_f64x4!(Q0atan, 1.945506571482613964425E2);
931
932 let t = self.abs();
933
934 let notbig = t.cmp_le(T3PO8);
938 let notsmal = t.cmp_ge(Self::splat(0.66));
939
940 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
941 s = notsmal & s;
942 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
943 fac = notsmal & fac;
944
945 let mut a = notbig & t;
949 a = notsmal.blend(a - Self::ONE, a);
950 let mut b = notbig & Self::ONE;
951 b = notsmal.blend(b + t, b);
952 let z = a / b;
953
954 let zz = z * z;
955
956 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
957 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
958
959 let mut re = (px / qx).mul_add(z * zz, z);
960 re += s + fac;
961
962 re = (self.sign_bit()).blend(-re, re);
964
965 re
966 }
967
968 #[inline]
969 pub fn atan2(self, x: Self) -> Self {
970 const_f64_as_f64x4!(MORE_BITS, 6.123233995736765886130E-17);
973 const_f64_as_f64x4!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
974 const_f64_as_f64x4!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
975
976 const_f64_as_f64x4!(P4atan, -8.750608600031904122785E-1);
977 const_f64_as_f64x4!(P3atan, -1.615753718733365076637E1);
978 const_f64_as_f64x4!(P2atan, -7.500855792314704667340E1);
979 const_f64_as_f64x4!(P1atan, -1.228866684490136173410E2);
980 const_f64_as_f64x4!(P0atan, -6.485021904942025371773E1);
981
982 const_f64_as_f64x4!(Q4atan, 2.485846490142306297962E1);
983 const_f64_as_f64x4!(Q3atan, 1.650270098316988542046E2);
984 const_f64_as_f64x4!(Q2atan, 4.328810604912902668951E2);
985 const_f64_as_f64x4!(Q1atan, 4.853903996359136964868E2);
986 const_f64_as_f64x4!(Q0atan, 1.945506571482613964425E2);
987
988 let y = self;
989
990 let x1 = x.abs();
992 let y1 = y.abs();
993 let swapxy = y1.cmp_gt(x1);
994 let mut x2 = swapxy.blend(y1, x1);
996 let mut y2 = swapxy.blend(x1, y1);
997
998 let both_infinite = x.is_inf() & y.is_inf();
1000 if both_infinite.any() {
1001 let minus_one = -Self::ONE;
1002 x2 = both_infinite.blend(x2 & minus_one, x2);
1003 y2 = both_infinite.blend(y2 & minus_one, y2);
1004 }
1005
1006 let t = y2 / x2;
1008
1009 let notbig = t.cmp_le(T3PO8);
1013 let notsmal = t.cmp_ge(Self::splat(0.66));
1014
1015 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1016 s = notsmal & s;
1017 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1018 fac = notsmal & fac;
1019
1020 let mut a = notbig & t;
1024 a = notsmal.blend(a - Self::ONE, a);
1025 let mut b = notbig & Self::ONE;
1026 b = notsmal.blend(b + t, b);
1027 let z = a / b;
1028
1029 let zz = z * z;
1030
1031 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1032 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1033
1034 let mut re = (px / qx).mul_add(z * zz, z);
1035 re += s + fac;
1036
1037 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1039 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
1040 re = (x.sign_bit()).blend(Self::PI - re, re);
1041
1042 re = (y.sign_bit()).blend(-re, re);
1044
1045 re
1046 }
1047
1048 #[inline]
1049 #[must_use]
1050 pub fn sin_cos(self) -> (Self, Self) {
1051 const_f64_as_f64x4!(P0sin, -1.66666666666666307295E-1);
1055 const_f64_as_f64x4!(P1sin, 8.33333333332211858878E-3);
1056 const_f64_as_f64x4!(P2sin, -1.98412698295895385996E-4);
1057 const_f64_as_f64x4!(P3sin, 2.75573136213857245213E-6);
1058 const_f64_as_f64x4!(P4sin, -2.50507477628578072866E-8);
1059 const_f64_as_f64x4!(P5sin, 1.58962301576546568060E-10);
1060
1061 const_f64_as_f64x4!(P0cos, 4.16666666666665929218E-2);
1062 const_f64_as_f64x4!(P1cos, -1.38888888888730564116E-3);
1063 const_f64_as_f64x4!(P2cos, 2.48015872888517045348E-5);
1064 const_f64_as_f64x4!(P3cos, -2.75573141792967388112E-7);
1065 const_f64_as_f64x4!(P4cos, 2.08757008419747316778E-9);
1066 const_f64_as_f64x4!(P5cos, -1.13585365213876817300E-11);
1067
1068 const_f64_as_f64x4!(DP1, 7.853981554508209228515625E-1 * 2.);
1069 const_f64_as_f64x4!(DP2, 7.94662735614792836714E-9 * 2.);
1070 const_f64_as_f64x4!(DP3, 3.06161699786838294307E-17 * 2.);
1071
1072 const_f64_as_f64x4!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1073
1074 let xa = self.abs();
1075
1076 let y = (xa * TWO_OVER_PI).round();
1077 let q = y.round_int();
1078
1079 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1080
1081 let x2 = x * x;
1082 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1083 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1084 s = (x * x2).mul_add(s, x);
1085 c =
1086 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x4::from(0.5), f64x4::from(1.0)));
1087
1088 let swap = !((q & i64x4::from(1)).cmp_eq(i64x4::from(0)));
1089
1090 let mut overflow: f64x4 = cast(q.cmp_gt(i64x4::from(0x80000000000000)));
1091 overflow &= xa.is_finite();
1092 s = overflow.blend(f64x4::from(0.0), s);
1093 c = overflow.blend(f64x4::from(1.0), c);
1094
1095 let mut sin1 = cast::<_, f64x4>(swap).blend(c, s);
1097 let sign_sin: i64x4 = (q << 62) ^ cast::<_, i64x4>(self);
1098 sin1 = sin1.flip_signs(cast(sign_sin));
1099
1100 let mut cos1 = cast::<_, f64x4>(swap).blend(s, c);
1102 let sign_cos: i64x4 = ((q + i64x4::from(1)) & i64x4::from(2)) << 62;
1103 cos1 ^= cast::<_, f64x4>(sign_cos);
1104
1105 (sin1, cos1)
1106 }
1107 #[inline]
1108 #[must_use]
1109 pub fn sin(self) -> Self {
1110 let (s, _) = self.sin_cos();
1111 s
1112 }
1113 #[inline]
1114 #[must_use]
1115 pub fn cos(self) -> Self {
1116 let (_, c) = self.sin_cos();
1117 c
1118 }
1119 #[inline]
1120 #[must_use]
1121 pub fn tan(self) -> Self {
1122 let (s, c) = self.sin_cos();
1123 s / c
1124 }
1125 #[inline]
1126 #[must_use]
1127 pub fn to_degrees(self) -> Self {
1128 const_f64_as_f64x4!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1129 self * RAD_TO_DEG_RATIO
1130 }
1131 #[inline]
1132 #[must_use]
1133 pub fn to_radians(self) -> Self {
1134 const_f64_as_f64x4!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1135 self * DEG_TO_RAD_RATIO
1136 }
1137 #[inline]
1138 #[must_use]
1139 pub fn sqrt(self) -> Self {
1140 pick! {
1141 if #[cfg(target_feature="avx")] {
1142 Self { avx: sqrt_m256d(self.avx) }
1143 } else {
1144 Self {
1145 a : self.a.sqrt(),
1146 b : self.b.sqrt(),
1147 }
1148 }
1149 }
1150 }
1151 #[inline]
1152 #[must_use]
1153 pub fn move_mask(self) -> i32 {
1154 pick! {
1155 if #[cfg(target_feature="avx")] {
1156 move_mask_m256d(self.avx)
1157 } else {
1158 (self.b.move_mask() << 2) | self.a.move_mask()
1159 }
1160 }
1161 }
1162 #[inline]
1163 #[must_use]
1164 pub fn any(self) -> bool {
1165 pick! {
1166 if #[cfg(target_feature="avx")] {
1167 move_mask_m256d(self.avx) != 0
1168 } else {
1169 self.a.any() || self.b.any()
1170 }
1171 }
1172 }
1173 #[inline]
1174 #[must_use]
1175 pub fn all(self) -> bool {
1176 pick! {
1177 if #[cfg(target_feature="avx")] {
1178 move_mask_m256d(self.avx) == 0b1111
1179 } else {
1180 self.a.all() && self.b.all()
1181 }
1182 }
1183 }
1184 #[inline]
1185 #[must_use]
1186 pub fn none(self) -> bool {
1187 !self.any()
1188 }
1189
1190 #[inline]
1191 fn vm_pow2n(self) -> Self {
1192 const_f64_as_f64x4!(pow2_52, 4503599627370496.0);
1193 const_f64_as_f64x4!(bias, 1023.0);
1194 let a = self + (bias + pow2_52);
1195 let c = cast::<_, i64x4>(a) << 52;
1196 cast::<_, f64x4>(c)
1197 }
1198
1199 #[inline]
1201 #[must_use]
1202 pub fn exp(self) -> Self {
1203 const_f64_as_f64x4!(P2, 1.0 / 2.0);
1204 const_f64_as_f64x4!(P3, 1.0 / 6.0);
1205 const_f64_as_f64x4!(P4, 1. / 24.);
1206 const_f64_as_f64x4!(P5, 1. / 120.);
1207 const_f64_as_f64x4!(P6, 1. / 720.);
1208 const_f64_as_f64x4!(P7, 1. / 5040.);
1209 const_f64_as_f64x4!(P8, 1. / 40320.);
1210 const_f64_as_f64x4!(P9, 1. / 362880.);
1211 const_f64_as_f64x4!(P10, 1. / 3628800.);
1212 const_f64_as_f64x4!(P11, 1. / 39916800.);
1213 const_f64_as_f64x4!(P12, 1. / 479001600.);
1214 const_f64_as_f64x4!(P13, 1. / 6227020800.);
1215 const_f64_as_f64x4!(LN2D_HI, 0.693145751953125);
1216 const_f64_as_f64x4!(LN2D_LO, 1.42860682030941723212E-6);
1217 let max_x = f64x4::from(708.39);
1218 let r = (self * Self::LOG2_E).round();
1219 let x = r.mul_neg_add(LN2D_HI, self);
1220 let x = r.mul_neg_add(LN2D_LO, x);
1221 let z =
1222 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1223 let n2 = Self::vm_pow2n(r);
1224 let z = (z + Self::ONE) * n2;
1225 let in_range = self.abs().cmp_lt(max_x);
1227 let in_range = in_range & self.is_finite();
1228 in_range.blend(z, Self::ZERO)
1229 }
1230
1231 #[inline]
1232 fn exponent(self) -> f64x4 {
1233 const_f64_as_f64x4!(pow2_52, 4503599627370496.0);
1234 const_f64_as_f64x4!(bias, 1023.0);
1235 let a = cast::<_, u64x4>(self);
1236 let b = a >> 52;
1237 let c = b | cast::<_, u64x4>(pow2_52);
1238 let d = cast::<_, f64x4>(c);
1239 let e = d - (pow2_52 + bias);
1240 e
1241 }
1242
1243 #[inline]
1244 fn fraction_2(self) -> Self {
1245 let t1 = cast::<_, u64x4>(self);
1246 let t2 = cast::<_, u64x4>(
1247 (t1 & u64x4::from(0x000FFFFFFFFFFFFF)) | u64x4::from(0x3FE0000000000000),
1248 );
1249 cast::<_, f64x4>(t2)
1250 }
1251 #[inline]
1252 fn is_zero_or_subnormal(self) -> Self {
1253 let t = cast::<_, i64x4>(self);
1254 let t = t & i64x4::splat(0x7FF0000000000000);
1255 i64x4::round_float(t.cmp_eq(i64x4::splat(0)))
1256 }
1257 #[inline]
1258 fn infinity() -> Self {
1259 cast::<_, f64x4>(i64x4::splat(0x7FF0000000000000))
1260 }
1261 #[inline]
1262 fn nan_log() -> Self {
1263 cast::<_, f64x4>(i64x4::splat(0x7FF8000000000000 | 0x101 << 29))
1264 }
1265 #[inline]
1266 fn nan_pow() -> Self {
1267 cast::<_, f64x4>(i64x4::splat(0x7FF8000000000000 | 0x101 << 29))
1268 }
1269 #[inline]
1270 fn sign_bit(self) -> Self {
1271 let t1 = cast::<_, i64x4>(self);
1272 let t2 = t1 >> 63;
1273 !cast::<_, f64x4>(t2).cmp_eq(f64x4::ZERO)
1274 }
1275
1276 #[inline]
1278 pub fn reduce_add(self) -> f64 {
1279 pick! {
1280 if #[cfg(target_feature="avx")] {
1281 let lo = cast_to_m128d_from_m256d(self.avx);
1283 let hi = extract_m128d_from_m256d::<1>(self.avx);
1284 let lo = add_m128d(lo,hi);
1285 let hi64 = unpack_high_m128d(lo,lo);
1286 let sum = add_m128d_s(lo,hi64);
1287 get_f64_from_m128d_s(sum)
1288 } else {
1289 self.a.reduce_add() + self.b.reduce_add()
1290 }
1291 }
1292 }
1293
1294 #[inline]
1296 #[must_use]
1297 pub fn ln(self) -> Self {
1298 const_f64_as_f64x4!(HALF, 0.5);
1299 const_f64_as_f64x4!(P0, 7.70838733755885391666E0);
1300 const_f64_as_f64x4!(P1, 1.79368678507819816313E1);
1301 const_f64_as_f64x4!(P2, 1.44989225341610930846E1);
1302 const_f64_as_f64x4!(P3, 4.70579119878881725854E0);
1303 const_f64_as_f64x4!(P4, 4.97494994976747001425E-1);
1304 const_f64_as_f64x4!(P5, 1.01875663804580931796E-4);
1305
1306 const_f64_as_f64x4!(Q0, 2.31251620126765340583E1);
1307 const_f64_as_f64x4!(Q1, 7.11544750618563894466E1);
1308 const_f64_as_f64x4!(Q2, 8.29875266912776603211E1);
1309 const_f64_as_f64x4!(Q3, 4.52279145837532221105E1);
1310 const_f64_as_f64x4!(Q4, 1.12873587189167450590E1);
1311 const_f64_as_f64x4!(LN2F_HI, 0.693359375);
1312 const_f64_as_f64x4!(LN2F_LO, -2.12194440e-4);
1313 const_f64_as_f64x4!(VM_SQRT2, 1.414213562373095048801);
1314 const_f64_as_f64x4!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1315
1316 let x1 = self;
1317 let x = Self::fraction_2(x1);
1318 let e = Self::exponent(x1);
1319 let mask = x.cmp_gt(VM_SQRT2 * HALF);
1320 let x = (!mask).blend(x + x, x);
1321 let fe = mask.blend(e + Self::ONE, e);
1322 let x = x - Self::ONE;
1323 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1324 let x2 = x * x;
1325 let px = x2 * x * px;
1326 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1327 let res = px / qx;
1328 let res = fe.mul_add(LN2F_LO, res);
1329 let res = res + x2.mul_neg_add(HALF, x);
1330 let res = fe.mul_add(LN2F_HI, res);
1331 let overflow = !self.is_finite();
1332 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1333 let mask = overflow | underflow;
1334 if !mask.any() {
1335 res
1336 } else {
1337 let is_zero = self.is_zero_or_subnormal();
1338 let res = underflow.blend(Self::nan_log(), res);
1339 let res = is_zero.blend(Self::infinity(), res);
1340 let res = overflow.blend(self, res);
1341 res
1342 }
1343 }
1344
1345 #[inline]
1346 #[must_use]
1347 pub fn log2(self) -> Self {
1348 Self::ln(self) * Self::LOG2_E
1349 }
1350 #[inline]
1351 #[must_use]
1352 pub fn log10(self) -> Self {
1353 Self::ln(self) * Self::LOG10_E
1354 }
1355
1356 #[inline]
1357 #[must_use]
1358 pub fn pow_f64x4(self, y: Self) -> Self {
1359 const_f64_as_f64x4!(ln2d_hi, 0.693145751953125);
1360 const_f64_as_f64x4!(ln2d_lo, 1.42860682030941723212E-6);
1361 const_f64_as_f64x4!(P0log, 2.0039553499201281259648E1);
1362 const_f64_as_f64x4!(P1log, 5.7112963590585538103336E1);
1363 const_f64_as_f64x4!(P2log, 6.0949667980987787057556E1);
1364 const_f64_as_f64x4!(P3log, 2.9911919328553073277375E1);
1365 const_f64_as_f64x4!(P4log, 6.5787325942061044846969E0);
1366 const_f64_as_f64x4!(P5log, 4.9854102823193375972212E-1);
1367 const_f64_as_f64x4!(P6log, 4.5270000862445199635215E-5);
1368 const_f64_as_f64x4!(Q0log, 6.0118660497603843919306E1);
1369 const_f64_as_f64x4!(Q1log, 2.1642788614495947685003E2);
1370 const_f64_as_f64x4!(Q2log, 3.0909872225312059774938E2);
1371 const_f64_as_f64x4!(Q3log, 2.2176239823732856465394E2);
1372 const_f64_as_f64x4!(Q4log, 8.3047565967967209469434E1);
1373 const_f64_as_f64x4!(Q5log, 1.5062909083469192043167E1);
1374
1375 const_f64_as_f64x4!(p2, 1.0 / 2.0); const_f64_as_f64x4!(p3, 1.0 / 6.0);
1378 const_f64_as_f64x4!(p4, 1.0 / 24.0);
1379 const_f64_as_f64x4!(p5, 1.0 / 120.0);
1380 const_f64_as_f64x4!(p6, 1.0 / 720.0);
1381 const_f64_as_f64x4!(p7, 1.0 / 5040.0);
1382 const_f64_as_f64x4!(p8, 1.0 / 40320.0);
1383 const_f64_as_f64x4!(p9, 1.0 / 362880.0);
1384 const_f64_as_f64x4!(p10, 1.0 / 3628800.0);
1385 const_f64_as_f64x4!(p11, 1.0 / 39916800.0);
1386 const_f64_as_f64x4!(p12, 1.0 / 479001600.0);
1387 const_f64_as_f64x4!(p13, 1.0 / 6227020800.0);
1388
1389 let x1 = self.abs();
1390 let x = x1.fraction_2();
1391 let mask = x.cmp_gt(f64x4::SQRT_2 * f64x4::HALF);
1392 let x = (!mask).blend(x + x, x);
1393 let x = x - f64x4::ONE;
1394 let x2 = x * x;
1395 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1396 let px = px * x * x2;
1397 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1398 let lg1 = px / qx;
1399
1400 let ef = x1.exponent();
1401 let ef = mask.blend(ef + f64x4::ONE, ef);
1402 let e1 = (ef * y).round();
1403 let yr = ef.mul_sub(y, e1);
1404
1405 let lg = f64x4::HALF.mul_neg_add(x2, x) + lg1;
1406 let x2err = (f64x4::HALF * x).mul_sub(x, f64x4::HALF * x2);
1407 let lg_err = f64x4::HALF.mul_add(x2, lg - x) - lg1;
1408
1409 let e2 = (lg * y * f64x4::LOG2_E).round();
1410 let v = lg.mul_sub(y, e2 * ln2d_hi);
1411 let v = e2.mul_neg_add(ln2d_lo, v);
1412 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x4::LN_2);
1413
1414 let x = v;
1415 let e3 = (x * f64x4::LOG2_E).round();
1416 let x = e3.mul_neg_add(f64x4::LN_2, x);
1417 let z =
1418 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1419 + f64x4::ONE;
1420 let ee = e1 + e2 + e3;
1421 let ei = cast::<_, i64x4>(ee.round_int());
1422 let ej = cast::<_, i64x4>(ei + (cast::<_, i64x4>(z) >> 52));
1423
1424 let overflow = cast::<_, f64x4>(!ej.cmp_lt(i64x4::splat(0x07FF)))
1425 | ee.cmp_gt(f64x4::splat(3000.0));
1426 let underflow = cast::<_, f64x4>(!ej.cmp_gt(i64x4::splat(0x000)))
1427 | ee.cmp_lt(f64x4::splat(-3000.0));
1428
1429 let z = cast::<_, f64x4>(cast::<_, i64x4>(z) + (ei << 52));
1431
1432 let z = if (overflow | underflow).any() {
1434 let z = underflow.blend(f64x4::ZERO, z);
1435 overflow.blend(Self::infinity(), z)
1436 } else {
1437 z
1438 };
1439
1440 let x_zero = self.is_zero_or_subnormal();
1442 let z = x_zero.blend(
1443 y.cmp_lt(f64x4::ZERO).blend(
1444 Self::infinity(),
1445 y.cmp_eq(f64x4::ZERO).blend(f64x4::ONE, f64x4::ZERO),
1446 ),
1447 z,
1448 );
1449
1450 let x_sign = self.sign_bit();
1451
1452 let z = if x_sign.any() {
1453 let yi = y.cmp_eq(y.round());
1455 let y_odd = cast::<_, i64x4>(y.round_int() << 63).round_float();
1457 let z1 =
1458 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1459 x_sign.blend(z1, z)
1460 } else {
1461 z
1462 };
1463
1464 let x_finite = self.is_finite();
1465 let y_finite = y.is_finite();
1466 let e_finite = ee.is_finite();
1467
1468 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1469 return z;
1470 }
1471
1472 (self.is_nan() | y.is_nan()).blend(self + y, z)
1473 }
1474 #[inline]
1475 pub fn powf(self, y: f64) -> Self {
1476 Self::pow_f64x4(self, f64x4::splat(y))
1477 }
1478
1479 #[inline]
1480 pub fn to_array(self) -> [f64; 4] {
1481 cast(self)
1482 }
1483
1484 #[inline]
1485 pub fn as_array_ref(&self) -> &[f64; 4] {
1486 cast_ref(self)
1487 }
1488
1489 #[inline]
1490 pub fn as_array_mut(&mut self) -> &mut [f64; 4] {
1491 cast_mut(self)
1492 }
1493
1494 #[inline]
1495 pub fn from_i32x4(v: i32x4) -> Self {
1496 pick! {
1497 if #[cfg(target_feature="avx")] {
1498 Self { avx: convert_to_m256d_from_i32_m128i(v.sse) }
1499 } else {
1500 Self::new([
1501 v.as_array_ref()[0] as f64,
1502 v.as_array_ref()[1] as f64,
1503 v.as_array_ref()[2] as f64,
1504 v.as_array_ref()[3] as f64,
1505 ])
1506 }
1507 }
1508 }
1509}
1510
1511impl From<i32x4> for f64x4 {
1512 #[inline]
1513 fn from(v: i32x4) -> Self {
1514 Self::from_i32x4(v)
1515 }
1516}
1517
1518impl Not for f64x4 {
1519 type Output = Self;
1520 #[inline]
1521 fn not(self) -> Self {
1522 pick! {
1523 if #[cfg(target_feature="avx")] {
1524 Self { avx: self.avx.not() }
1525 } else {
1526 Self {
1527 a : self.a.not(),
1528 b : self.b.not(),
1529 }
1530 }
1531 }
1532 }
1533}