1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(16))]
7 pub struct f64x2 { pub(crate) sse: m128d }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct f64x2 { pub(crate) simd: v128 }
14
15 impl Default for f64x2 {
16 fn default() -> Self {
17 Self::splat(0.0)
18 }
19 }
20
21 impl PartialEq for f64x2 {
22 fn eq(&self, other: &Self) -> bool {
23 u64x2_all_true(f64x2_eq(self.simd, other.simd))
24 }
25 }
26 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27 use core::arch::aarch64::*;
28 #[repr(C)]
29 #[derive(Copy, Clone)]
30 pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32 impl Default for f64x2 {
33 #[inline]
34 #[must_use]
35 fn default() -> Self {
36 unsafe { Self { neon: vdupq_n_f64(0.0)} }
37 }
38 }
39
40 impl PartialEq for f64x2 {
41 #[inline]
42 #[must_use]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe
45 { let e = vceqq_f64(self.neon, other.neon);
46 vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
47 }
48 }
49
50 }
51 } else {
52 #[derive(Default, Clone, Copy, PartialEq)]
53 #[repr(C, align(16))]
54 pub struct f64x2 { pub(crate) arr: [f64;2] }
55 }
56}
57
58macro_rules! const_f64_as_f64x2 {
59 ($i:ident, $f:expr) => {
60 #[allow(non_upper_case_globals)]
61 pub const $i: f64x2 = f64x2::new([$f; 2]);
62 };
63}
64
65impl f64x2 {
66 const_f64_as_f64x2!(ONE, 1.0);
67 const_f64_as_f64x2!(ZERO, 0.0);
68 const_f64_as_f64x2!(HALF, 0.5);
69 const_f64_as_f64x2!(E, core::f64::consts::E);
70 const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
71 const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
72 const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
73 const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
74 const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
75 const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
76 const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
77 const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
78 const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
79 const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
80 const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
81 const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
82 const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
83 const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
84 const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
85 const_f64_as_f64x2!(PI, core::f64::consts::PI);
86 const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
87 const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
88}
89
90unsafe impl Zeroable for f64x2 {}
91unsafe impl Pod for f64x2 {}
92
93impl Add for f64x2 {
94 type Output = Self;
95 #[inline]
96 #[must_use]
97 fn add(self, rhs: Self) -> Self::Output {
98 pick! {
99 if #[cfg(target_feature="sse2")] {
100 Self { sse: add_m128d(self.sse, rhs.sse) }
101 } else if #[cfg(target_feature="simd128")] {
102 Self { simd: f64x2_add(self.simd, rhs.simd) }
103 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
104 unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
105 } else {
106 Self { arr: [
107 self.arr[0] + rhs.arr[0],
108 self.arr[1] + rhs.arr[1],
109 ]}
110 }
111 }
112 }
113}
114
115impl Sub for f64x2 {
116 type Output = Self;
117 #[inline]
118 #[must_use]
119 fn sub(self, rhs: Self) -> Self::Output {
120 pick! {
121 if #[cfg(target_feature="sse2")] {
122 Self { sse: sub_m128d(self.sse, rhs.sse) }
123 } else if #[cfg(target_feature="simd128")] {
124 Self { simd: f64x2_sub(self.simd, rhs.simd) }
125 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
126 unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
127 } else {
128 Self { arr: [
129 self.arr[0] - rhs.arr[0],
130 self.arr[1] - rhs.arr[1],
131 ]}
132 }
133 }
134 }
135}
136
137impl Mul for f64x2 {
138 type Output = Self;
139 #[inline]
140 #[must_use]
141 fn mul(self, rhs: Self) -> Self::Output {
142 pick! {
143 if #[cfg(target_feature="sse2")] {
144 Self { sse: mul_m128d(self.sse, rhs.sse) }
145 } else if #[cfg(target_feature="simd128")] {
146 Self { simd: f64x2_mul(self.simd, rhs.simd) }
147 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
148 unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
149 } else {
150 Self { arr: [
151 self.arr[0] * rhs.arr[0],
152 self.arr[1] * rhs.arr[1],
153 ]}
154 }
155 }
156 }
157}
158
159impl Div for f64x2 {
160 type Output = Self;
161 #[inline]
162 #[must_use]
163 fn div(self, rhs: Self) -> Self::Output {
164 pick! {
165 if #[cfg(target_feature="sse2")] {
166 Self { sse: div_m128d(self.sse, rhs.sse) }
167 } else if #[cfg(target_feature="simd128")] {
168 Self { simd: f64x2_div(self.simd, rhs.simd) }
169 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
170 unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
171 } else {
172 Self { arr: [
173 self.arr[0] / rhs.arr[0],
174 self.arr[1] / rhs.arr[1],
175 ]}
176 }
177 }
178 }
179}
180
181impl Add<f64> for f64x2 {
182 type Output = Self;
183 #[inline]
184 #[must_use]
185 fn add(self, rhs: f64) -> Self::Output {
186 self.add(Self::splat(rhs))
187 }
188}
189
190impl Sub<f64> for f64x2 {
191 type Output = Self;
192 #[inline]
193 #[must_use]
194 fn sub(self, rhs: f64) -> Self::Output {
195 self.sub(Self::splat(rhs))
196 }
197}
198
199impl Mul<f64> for f64x2 {
200 type Output = Self;
201 #[inline]
202 #[must_use]
203 fn mul(self, rhs: f64) -> Self::Output {
204 self.mul(Self::splat(rhs))
205 }
206}
207
208impl Div<f64> for f64x2 {
209 type Output = Self;
210 #[inline]
211 #[must_use]
212 fn div(self, rhs: f64) -> Self::Output {
213 self.div(Self::splat(rhs))
214 }
215}
216
217impl Add<f64x2> for f64 {
218 type Output = f64x2;
219 #[inline]
220 #[must_use]
221 fn add(self, rhs: f64x2) -> Self::Output {
222 f64x2::splat(self).add(rhs)
223 }
224}
225
226impl Sub<f64x2> for f64 {
227 type Output = f64x2;
228 #[inline]
229 #[must_use]
230 fn sub(self, rhs: f64x2) -> Self::Output {
231 f64x2::splat(self).sub(rhs)
232 }
233}
234
235impl Mul<f64x2> for f64 {
236 type Output = f64x2;
237 #[inline]
238 #[must_use]
239 fn mul(self, rhs: f64x2) -> Self::Output {
240 f64x2::splat(self).mul(rhs)
241 }
242}
243
244impl Div<f64x2> for f64 {
245 type Output = f64x2;
246 #[inline]
247 #[must_use]
248 fn div(self, rhs: f64x2) -> Self::Output {
249 f64x2::splat(self).div(rhs)
250 }
251}
252
253impl BitAnd for f64x2 {
254 type Output = Self;
255 #[inline]
256 #[must_use]
257 fn bitand(self, rhs: Self) -> Self::Output {
258 pick! {
259 if #[cfg(target_feature="sse2")] {
260 Self { sse: bitand_m128d(self.sse, rhs.sse) }
261 } else if #[cfg(target_feature="simd128")] {
262 Self { simd: v128_and(self.simd, rhs.simd) }
263 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
264 unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
265 } else {
266 Self { arr: [
267 f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
268 f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
269 ]}
270 }
271 }
272 }
273}
274
275impl BitOr for f64x2 {
276 type Output = Self;
277 #[inline]
278 #[must_use]
279 fn bitor(self, rhs: Self) -> Self::Output {
280 pick! {
281 if #[cfg(target_feature="sse2")] {
282 Self { sse: bitor_m128d(self.sse, rhs.sse) }
283 } else if #[cfg(target_feature="simd128")] {
284 Self { simd: v128_or(self.simd, rhs.simd) }
285 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
286 unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
287 } else {
288 Self { arr: [
289 f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
290 f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
291 ]}
292 }
293 }
294 }
295}
296
297impl BitXor for f64x2 {
298 type Output = Self;
299 #[inline]
300 #[must_use]
301 fn bitxor(self, rhs: Self) -> Self::Output {
302 pick! {
303 if #[cfg(target_feature="sse2")] {
304 Self { sse: bitxor_m128d(self.sse, rhs.sse) }
305 } else if #[cfg(target_feature="simd128")] {
306 Self { simd: v128_xor(self.simd, rhs.simd) }
307 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
308 unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
309 } else {
310 Self { arr: [
311 f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
312 f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
313 ]}
314 }
315 }
316 }
317}
318
319impl CmpEq for f64x2 {
320 type Output = Self;
321 #[inline]
322 #[must_use]
323 fn cmp_eq(self, rhs: Self) -> Self::Output {
324 pick! {
325 if #[cfg(target_feature="sse2")] {
326 Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
327 } else if #[cfg(target_feature="simd128")] {
328 Self { simd: f64x2_eq(self.simd, rhs.simd) }
329 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
330 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
331 } else {
332 Self { arr: [
333 if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
334 if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
335 ]}
336 }
337 }
338 }
339}
340
341impl CmpGe for f64x2 {
342 type Output = Self;
343 #[inline]
344 #[must_use]
345 fn cmp_ge(self, rhs: Self) -> Self::Output {
346 pick! {
347 if #[cfg(target_feature="sse2")] {
348 Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
349 } else if #[cfg(target_feature="simd128")] {
350 Self { simd: f64x2_ge(self.simd, rhs.simd) }
351 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
352 unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
353 } else {
354 Self { arr: [
355 if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
356 if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
357 ]}
358 }
359 }
360 }
361}
362
363impl CmpGt for f64x2 {
364 type Output = Self;
365 #[inline]
366 #[must_use]
367 fn cmp_gt(self, rhs: Self) -> Self::Output {
368 pick! {
369 if #[cfg(target_feature="avx")] {
370 Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
371 } else if #[cfg(target_feature="sse2")] {
372 Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
373 } else if #[cfg(target_feature="simd128")] {
374 Self { simd: f64x2_gt(self.simd, rhs.simd) }
375 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
376 unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
377 } else {
378 Self { arr: [
379 if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
380 if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
381 ]}
382 }
383 }
384 }
385}
386
387impl CmpNe for f64x2 {
388 type Output = Self;
389 #[inline]
390 #[must_use]
391 fn cmp_ne(self, rhs: Self) -> Self::Output {
392 pick! {
393 if #[cfg(target_feature="sse2")] {
394 Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
395 } else if #[cfg(target_feature="simd128")] {
396 Self { simd: f64x2_ne(self.simd, rhs.simd) }
397 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
398 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
399 } else {
400 Self { arr: [
401 if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
402 if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
403 ]}
404 }
405 }
406 }
407}
408
409impl CmpLe for f64x2 {
410 type Output = Self;
411 #[inline]
412 #[must_use]
413 fn cmp_le(self, rhs: Self) -> Self::Output {
414 pick! {
415 if #[cfg(target_feature="sse2")] {
416 Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
417 } else if #[cfg(target_feature="simd128")] {
418 Self { simd: f64x2_le(self.simd, rhs.simd) }
419 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
420 unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
421 } else {
422 Self { arr: [
423 if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
424 if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
425 ]}
426 }
427 }
428 }
429}
430
431impl CmpLt for f64x2 {
432 type Output = Self;
433 #[inline]
434 #[must_use]
435 fn cmp_lt(self, rhs: Self) -> Self::Output {
436 pick! {
437 if #[cfg(target_feature="sse2")] {
438 Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
439 } else if #[cfg(target_feature="simd128")] {
440 Self { simd: f64x2_lt(self.simd, rhs.simd) }
441 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
442 unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
443 } else {
444 Self { arr: [
445 if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
446 if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
447 ]}
448 }
449 }
450 }
451}
452
453impl f64x2 {
454 #[inline]
455 #[must_use]
456 pub const fn new(array: [f64; 2]) -> Self {
457 unsafe { core::intrinsics::transmute(array) }
458 }
459 #[inline]
460 #[must_use]
461 pub fn blend(self, t: Self, f: Self) -> Self {
462 pick! {
463 if #[cfg(target_feature="sse4.1")] {
464 Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
465 } else if #[cfg(target_feature="simd128")] {
466 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
467 } else {
468 generic_bit_blend(self, t, f)
469 }
470 }
471 }
472 #[inline]
473 #[must_use]
474 pub fn abs(self) -> Self {
475 pick! {
476 if #[cfg(target_feature="simd128")] {
477 Self { simd: f64x2_abs(self.simd) }
478 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
479 unsafe {Self { neon: vabsq_f64(self.neon) }}
480 } else {
481 let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
482 self & non_sign_bits
483 }
484 }
485 }
486 #[inline]
487 #[must_use]
488 pub fn floor(self) -> Self {
489 pick! {
490 if #[cfg(target_feature="simd128")] {
491 Self { simd: f64x2_floor(self.simd) }
492 } else if #[cfg(target_feature="sse4.1")] {
493 Self { sse: floor_m128d(self.sse) }
494 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
495 unsafe {Self { neon: vrndmq_f64(self.neon) }}
496 } else if #[cfg(feature="std")] {
497 let base: [f64; 2] = cast(self);
498 cast(base.map(|val| val.floor()))
499 } else {
500 let base: [f64; 2] = cast(self);
501 let rounded: [f64; 2] = cast(self.round());
502 cast([
503 if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
504 if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
505 ])
506 }
507 }
508 }
509 #[inline]
510 #[must_use]
511 pub fn ceil(self) -> Self {
512 pick! {
513 if #[cfg(target_feature="simd128")] {
514 Self { simd: f64x2_ceil(self.simd) }
515 } else if #[cfg(target_feature="sse4.1")] {
516 Self { sse: ceil_m128d(self.sse) }
517 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
518 unsafe {Self { neon: vrndpq_f64(self.neon) }}
519 } else if #[cfg(feature="std")] {
520 let base: [f64; 2] = cast(self);
521 cast(base.map(|val| val.ceil()))
522 } else {
523 let base: [f64; 2] = cast(self);
524 let rounded: [f64; 2] = cast(self.round());
525 cast([
526 if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
527 if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
528 ])
529 }
530 }
531 }
532
533 #[inline]
537 #[must_use]
538 pub fn fast_max(self, rhs: Self) -> Self {
539 pick! {
540 if #[cfg(target_feature="sse2")] {
541 Self { sse: max_m128d(self.sse, rhs.sse) }
542 } else if #[cfg(target_feature="simd128")] {
543 Self {
544 simd: f64x2_pmax(self.simd, rhs.simd),
545 }
546 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
547 unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
548 } else {
549 Self { arr: [
550 if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
551 if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
552 ]}
553 }
554 }
555 }
556
557 #[inline]
561 #[must_use]
562 pub fn max(self, rhs: Self) -> Self {
563 pick! {
564 if #[cfg(target_feature="sse2")] {
565 rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
569 } else if #[cfg(target_feature="simd128")] {
570 Self {
577 simd: v128_bitselect(
578 rhs.simd,
579 f64x2_pmax(self.simd, rhs.simd),
580 f64x2_ne(self.simd, self.simd), )
582 }
583 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
584 unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
585 } else {
586 Self { arr: [
587 self.arr[0].max(rhs.arr[0]),
588 self.arr[1].max(rhs.arr[1]),
589 ]}
590 }
591 }
592 }
593
594 #[inline]
598 #[must_use]
599 pub fn fast_min(self, rhs: Self) -> Self {
600 pick! {
601 if #[cfg(target_feature="sse2")] {
602 Self { sse: min_m128d(self.sse, rhs.sse) }
603 } else if #[cfg(target_feature="simd128")] {
604 Self {
605 simd: f64x2_pmin(self.simd, rhs.simd),
606 }
607 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
608 unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
609 } else {
610 Self { arr: [
611 if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
612 if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
613 ]}
614 }
615 }
616 }
617
618 #[inline]
622 #[must_use]
623 pub fn min(self, rhs: Self) -> Self {
624 pick! {
625 if #[cfg(target_feature="sse2")] {
626 rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
630 } else if #[cfg(target_feature="simd128")] {
631 Self {
638 simd: v128_bitselect(
639 rhs.simd,
640 f64x2_pmin(self.simd, rhs.simd),
641 f64x2_ne(self.simd, self.simd), )
643 }
644 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
645 unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
646 } else {
647 Self { arr: [
648 self.arr[0].min(rhs.arr[0]),
649 self.arr[1].min(rhs.arr[1]),
650 ]}
651 }
652 }
653 }
654
655 #[inline]
656 #[must_use]
657 pub fn is_nan(self) -> Self {
658 pick! {
659 if #[cfg(target_feature="sse2")] {
660 Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
661 } else if #[cfg(target_feature="simd128")] {
662 Self { simd: f64x2_ne(self.simd, self.simd) }
663 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
664 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
665 } else {
666 Self { arr: [
667 if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
668 if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
669 ]}
670 }
671 }
672 }
673 #[inline]
674 #[must_use]
675 pub fn is_finite(self) -> Self {
676 let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
677 let u: u64x2 = cast(self);
678 let shift_u = u << 1_u64;
679 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
680 cast(out)
681 }
682 #[inline]
683 #[must_use]
684 pub fn is_inf(self) -> Self {
685 let shifted_inf = u64x2::from(0xFFE0000000000000);
686 let u: u64x2 = cast(self);
687 let shift_u = u << 1_u64;
688 let out = (shift_u).cmp_eq(shifted_inf);
689 cast(out)
690 }
691
692 #[inline]
693 #[must_use]
694 pub fn round(self) -> Self {
695 pick! {
696 if #[cfg(target_feature="sse4.1")] {
697 Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
698 } else if #[cfg(target_feature="simd128")] {
699 Self { simd: f64x2_nearest(self.simd) }
700 } else {
701 let sign_mask = f64x2::from(-0.0);
702 let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
703 let sign = self & sign_mask;
704 let signed_magic = magic | sign;
705 self + signed_magic - signed_magic
706 }
707 }
708 }
709 #[inline]
710 #[must_use]
711 pub fn round_int(self) -> i64x2 {
712 let rounded: [f64; 2] = cast(self.round());
713 cast([rounded[0] as i64, rounded[1] as i64])
714 }
715 #[inline]
716 #[must_use]
717 pub fn mul_add(self, m: Self, a: Self) -> Self {
718 pick! {
719 if #[cfg(all(target_feature="fma"))] {
720 Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
721 } else {
722 (self * m) + a
723 }
724 }
725 }
726
727 #[inline]
728 #[must_use]
729 pub fn mul_sub(self, m: Self, a: Self) -> Self {
730 pick! {
731 if #[cfg(all(target_feature="fma"))] {
732 Self { sse: fused_mul_sub_m128d(self.sse, m.sse, a.sse) }
733 } else {
734 (self * m) - a
735 }
736 }
737 }
738
739 #[inline]
740 #[must_use]
741 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
742 pick! {
743 if #[cfg(all(target_feature="fma"))] {
744 Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
745 } else {
746 a - (self * m)
747 }
748 }
749 }
750
751 #[inline]
752 #[must_use]
753 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
754 pick! {
755 if #[cfg(all(target_feature="fma"))] {
756 Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, a.sse) }
757 } else {
758 -(self * m) - a
759 }
760 }
761 }
762
763 #[inline]
764 #[must_use]
765 pub fn flip_signs(self, signs: Self) -> Self {
766 self ^ (signs & Self::from(-0.0))
767 }
768
769 #[inline]
770 #[must_use]
771 pub fn copysign(self, sign: Self) -> Self {
772 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
773 (self & magnitude_mask) | (sign & Self::from(-0.0))
774 }
775
776 #[inline]
777 pub fn asin_acos(self) -> (Self, Self) {
778 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
781 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
782 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
783 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
784 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
785
786 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
787 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
788 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
789 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
790
791 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
792 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
793 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
794 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
795 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
796 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
797
798 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
799 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
800 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
801 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
802 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
803
804 let xa = self.abs();
805
806 let big = xa.cmp_ge(f64x2::splat(0.625));
807
808 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
809
810 let x2 = x1 * x1;
811 let x3 = x2 * x1;
812 let x4 = x2 * x2;
813 let x5 = x4 * x1;
814
815 let do_big = big.any();
816 let do_small = !big.all();
817
818 let mut rx = f64x2::default();
819 let mut sx = f64x2::default();
820 let mut px = f64x2::default();
821 let mut qx = f64x2::default();
822
823 if do_big {
824 rx = x3.mul_add(R3asin, x2 * R2asin)
825 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
826 sx =
827 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
828 }
829 if do_small {
830 px = x3.mul_add(P3asin, P0asin)
831 + x4.mul_add(P4asin, x1 * P1asin)
832 + x5.mul_add(P5asin, x2 * P2asin);
833 qx = x4.mul_add(Q4asin, x5)
834 + x3.mul_add(Q3asin, x1 * Q1asin)
835 + x2.mul_add(Q2asin, Q0asin);
836 };
837
838 let vx = big.blend(rx, px);
839 let wx = big.blend(sx, qx);
840
841 let y1 = vx / wx * x1;
842
843 let mut z1 = f64x2::default();
844 let mut z2 = f64x2::default();
845 if do_big {
846 let xb = (x1 + x1).sqrt();
847 z1 = xb.mul_add(y1, xb);
848 }
849
850 if do_small {
851 z2 = xa.mul_add(y1, xa);
852 }
853
854 let z3 = f64x2::FRAC_PI_2 - z1;
856 let asin = big.blend(z3, z2);
857 let asin = asin.flip_signs(self);
858
859 let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
861 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
862 let acos = big.blend(z3, z4);
863
864 (asin, acos)
865 }
866
867 #[inline]
868 pub fn acos(self) -> Self {
869 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
872 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
873 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
874 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
875 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
876
877 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
878 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
879 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
880 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
881
882 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
883 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
884 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
885 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
886 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
887 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
888
889 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
890 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
891 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
892 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
893 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
894
895 let xa = self.abs();
896
897 let big = xa.cmp_ge(f64x2::splat(0.625));
898
899 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
900
901 let x2 = x1 * x1;
902 let x3 = x2 * x1;
903 let x4 = x2 * x2;
904 let x5 = x4 * x1;
905
906 let do_big = big.any();
907 let do_small = !big.all();
908
909 let mut rx = f64x2::default();
910 let mut sx = f64x2::default();
911 let mut px = f64x2::default();
912 let mut qx = f64x2::default();
913
914 if do_big {
915 rx = x3.mul_add(R3asin, x2 * R2asin)
916 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
917 sx =
918 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
919 }
920 if do_small {
921 px = x3.mul_add(P3asin, P0asin)
922 + x4.mul_add(P4asin, x1 * P1asin)
923 + x5.mul_add(P5asin, x2 * P2asin);
924 qx = x4.mul_add(Q4asin, x5)
925 + x3.mul_add(Q3asin, x1 * Q1asin)
926 + x2.mul_add(Q2asin, Q0asin);
927 };
928
929 let vx = big.blend(rx, px);
930 let wx = big.blend(sx, qx);
931
932 let y1 = vx / wx * x1;
933
934 let mut z1 = f64x2::default();
935 let mut z2 = f64x2::default();
936 if do_big {
937 let xb = (x1 + x1).sqrt();
938 z1 = xb.mul_add(y1, xb);
939 }
940
941 if do_small {
942 z2 = xa.mul_add(y1, xa);
943 }
944
945 let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
947 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
948 let acos = big.blend(z3, z4);
949
950 acos
951 }
952
953 #[inline]
954 pub fn asin(self) -> Self {
955 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
958 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
959 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
960 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
961 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
962
963 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
964 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
965 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
966 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
967
968 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
969 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
970 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
971 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
972 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
973 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
974
975 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
976 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
977 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
978 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
979 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
980
981 let xa = self.abs();
982
983 let big = xa.cmp_ge(f64x2::splat(0.625));
984
985 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
986
987 let x2 = x1 * x1;
988 let x3 = x2 * x1;
989 let x4 = x2 * x2;
990 let x5 = x4 * x1;
991
992 let do_big = big.any();
993 let do_small = !big.all();
994
995 let mut rx = f64x2::default();
996 let mut sx = f64x2::default();
997 let mut px = f64x2::default();
998 let mut qx = f64x2::default();
999
1000 if do_big {
1001 rx = x3.mul_add(R3asin, x2 * R2asin)
1002 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
1003 sx =
1004 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
1005 }
1006 if do_small {
1007 px = x3.mul_add(P3asin, P0asin)
1008 + x4.mul_add(P4asin, x1 * P1asin)
1009 + x5.mul_add(P5asin, x2 * P2asin);
1010 qx = x4.mul_add(Q4asin, x5)
1011 + x3.mul_add(Q3asin, x1 * Q1asin)
1012 + x2.mul_add(Q2asin, Q0asin);
1013 };
1014
1015 let vx = big.blend(rx, px);
1016 let wx = big.blend(sx, qx);
1017
1018 let y1 = vx / wx * x1;
1019
1020 let mut z1 = f64x2::default();
1021 let mut z2 = f64x2::default();
1022 if do_big {
1023 let xb = (x1 + x1).sqrt();
1024 z1 = xb.mul_add(y1, xb);
1025 }
1026
1027 if do_small {
1028 z2 = xa.mul_add(y1, xa);
1029 }
1030
1031 let z3 = f64x2::FRAC_PI_2 - z1;
1033 let asin = big.blend(z3, z2);
1034 let asin = asin.flip_signs(self);
1035
1036 asin
1037 }
1038
1039 #[inline]
1040 pub fn atan(self) -> Self {
1041 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1044 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1045 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1046
1047 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1048 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1049 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1050 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1051 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1052
1053 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1054 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1055 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1056 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1057 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1058
1059 let t = self.abs();
1060
1061 let notbig = t.cmp_le(T3PO8);
1065 let notsmal = t.cmp_ge(Self::splat(0.66));
1066
1067 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1068 s = notsmal & s;
1069 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1070 fac = notsmal & fac;
1071
1072 let mut a = notbig & t;
1076 a = notsmal.blend(a - Self::ONE, a);
1077 let mut b = notbig & Self::ONE;
1078 b = notsmal.blend(b + t, b);
1079 let z = a / b;
1080
1081 let zz = z * z;
1082
1083 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1084 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1085
1086 let mut re = (px / qx).mul_add(z * zz, z);
1087 re += s + fac;
1088
1089 re = (self.sign_bit()).blend(-re, re);
1091
1092 re
1093 }
1094
1095 #[inline]
1096 pub fn atan2(self, x: Self) -> Self {
1097 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1100 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1101 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1102
1103 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1104 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1105 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1106 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1107 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1108
1109 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1110 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1111 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1112 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1113 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1114
1115 let y = self;
1116
1117 let x1 = x.abs();
1119 let y1 = y.abs();
1120 let swapxy = y1.cmp_gt(x1);
1121 let mut x2 = swapxy.blend(y1, x1);
1123 let mut y2 = swapxy.blend(x1, y1);
1124
1125 let both_infinite = x.is_inf() & y.is_inf();
1127 if both_infinite.any() {
1128 let minus_one = -Self::ONE;
1129 x2 = both_infinite.blend(x2 & minus_one, x2);
1130 y2 = both_infinite.blend(y2 & minus_one, y2);
1131 }
1132
1133 let t = y2 / x2;
1135
1136 let notbig = t.cmp_le(T3PO8);
1140 let notsmal = t.cmp_ge(Self::splat(0.66));
1141
1142 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1143 s = notsmal & s;
1144 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1145 fac = notsmal & fac;
1146
1147 let mut a = notbig & t;
1151 a = notsmal.blend(a - Self::ONE, a);
1152 let mut b = notbig & Self::ONE;
1153 b = notsmal.blend(b + t, b);
1154 let z = a / b;
1155
1156 let zz = z * z;
1157
1158 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1159 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1160
1161 let mut re = (px / qx).mul_add(z * zz, z);
1162 re += s + fac;
1163
1164 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1166 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
1167 re = (x.sign_bit()).blend(Self::PI - re, re);
1168
1169 re = (y.sign_bit()).blend(-re, re);
1171
1172 re
1173 }
1174
1175 #[inline]
1176 #[must_use]
1177 pub fn sin_cos(self) -> (Self, Self) {
1178 const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1182 const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1183 const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1184 const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1185 const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1186 const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1187
1188 const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1189 const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1190 const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1191 const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1192 const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1193 const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1194
1195 const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1196 const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1197 const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1198
1199 const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1200
1201 let xa = self.abs();
1202
1203 let y = (xa * TWO_OVER_PI).round();
1204 let q = y.round_int();
1205
1206 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1207
1208 let x2 = x * x;
1209 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1210 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1211 s = (x * x2).mul_add(s, x);
1212 c =
1213 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1214
1215 let swap = !((q & i64x2::from(1)).cmp_eq(i64x2::from(0)));
1216
1217 let mut overflow: f64x2 = cast(q.cmp_gt(i64x2::from(0x80000000000000)));
1218 overflow &= xa.is_finite();
1219 s = overflow.blend(f64x2::from(0.0), s);
1220 c = overflow.blend(f64x2::from(1.0), c);
1221
1222 let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1224 let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1225 sin1 = sin1.flip_signs(cast(sign_sin));
1226
1227 let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1229 let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1230 cos1 ^= cast::<_, f64x2>(sign_cos);
1231
1232 (sin1, cos1)
1233 }
1234 #[inline]
1235 #[must_use]
1236 pub fn sin(self) -> Self {
1237 let (s, _) = self.sin_cos();
1238 s
1239 }
1240 #[inline]
1241 #[must_use]
1242 pub fn cos(self) -> Self {
1243 let (_, c) = self.sin_cos();
1244 c
1245 }
1246 #[inline]
1247 #[must_use]
1248 pub fn tan(self) -> Self {
1249 let (s, c) = self.sin_cos();
1250 s / c
1251 }
1252 #[inline]
1253 #[must_use]
1254 pub fn to_degrees(self) -> Self {
1255 const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1256 self * RAD_TO_DEG_RATIO
1257 }
1258 #[inline]
1259 #[must_use]
1260 pub fn to_radians(self) -> Self {
1261 const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1262 self * DEG_TO_RAD_RATIO
1263 }
1264 #[inline]
1265 #[must_use]
1266 pub fn sqrt(self) -> Self {
1267 pick! {
1268 if #[cfg(target_feature="sse2")] {
1269 Self { sse: sqrt_m128d(self.sse) }
1270 } else if #[cfg(target_feature="simd128")] {
1271 Self { simd: f64x2_sqrt(self.simd) }
1272 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1273 unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1274 } else if #[cfg(feature="std")] {
1275 Self { arr: [
1276 self.arr[0].sqrt(),
1277 self.arr[1].sqrt(),
1278 ]}
1279 } else {
1280 Self { arr: [
1281 software_sqrt(self.arr[0]),
1282 software_sqrt(self.arr[1]),
1283 ]}
1284 }
1285 }
1286 }
1287 #[inline]
1288 #[must_use]
1289 pub fn move_mask(self) -> i32 {
1290 pick! {
1291 if #[cfg(target_feature="sse2")] {
1292 move_mask_m128d(self.sse)
1293 } else if #[cfg(target_feature="simd128")] {
1294 u64x2_bitmask(self.simd) as i32
1295 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1296 unsafe
1297 {
1298 let e = vreinterpretq_u64_f64(self.neon);
1299
1300 (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as i32
1301 }
1302 } else {
1303 (((self.arr[0].to_bits() as i64) < 0) as i32) << 0 |
1304 (((self.arr[1].to_bits() as i64) < 0) as i32) << 1
1305 }
1306 }
1307 }
1308 #[inline]
1309 #[must_use]
1310 pub fn any(self) -> bool {
1311 pick! {
1312 if #[cfg(target_feature="simd128")] {
1313 v128_any_true(self.simd)
1314 } else {
1315 self.move_mask() != 0
1316 }
1317 }
1318 }
1319 #[inline]
1320 #[must_use]
1321 pub fn all(self) -> bool {
1322 pick! {
1323 if #[cfg(target_feature="simd128")] {
1324 u64x2_all_true(self.simd)
1325 } else {
1326 self.move_mask() == 0b11
1328 }
1329 }
1330 }
1331 #[inline]
1332 #[must_use]
1333 pub fn none(self) -> bool {
1334 !self.any()
1335 }
1336
1337 #[inline]
1338 fn vm_pow2n(self) -> Self {
1339 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1340 const_f64_as_f64x2!(bias, 1023.0);
1341 let a = self + (bias + pow2_52);
1342 let c = cast::<_, i64x2>(a) << 52;
1343 cast::<_, f64x2>(c)
1344 }
1345
1346 #[inline]
1348 #[must_use]
1349 pub fn exp(self) -> Self {
1350 const_f64_as_f64x2!(P2, 1.0 / 2.0);
1351 const_f64_as_f64x2!(P3, 1.0 / 6.0);
1352 const_f64_as_f64x2!(P4, 1. / 24.);
1353 const_f64_as_f64x2!(P5, 1. / 120.);
1354 const_f64_as_f64x2!(P6, 1. / 720.);
1355 const_f64_as_f64x2!(P7, 1. / 5040.);
1356 const_f64_as_f64x2!(P8, 1. / 40320.);
1357 const_f64_as_f64x2!(P9, 1. / 362880.);
1358 const_f64_as_f64x2!(P10, 1. / 3628800.);
1359 const_f64_as_f64x2!(P11, 1. / 39916800.);
1360 const_f64_as_f64x2!(P12, 1. / 479001600.);
1361 const_f64_as_f64x2!(P13, 1. / 6227020800.);
1362 const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1363 const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1364 let max_x = f64x2::from(708.39);
1365 let r = (self * Self::LOG2_E).round();
1366 let x = r.mul_neg_add(LN2D_HI, self);
1367 let x = r.mul_neg_add(LN2D_LO, x);
1368 let z =
1369 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1370 let n2 = Self::vm_pow2n(r);
1371 let z = (z + Self::ONE) * n2;
1372 let in_range = self.abs().cmp_lt(max_x);
1374 let in_range = in_range & self.is_finite();
1375 in_range.blend(z, Self::ZERO)
1376 }
1377
1378 #[inline]
1379 fn exponent(self) -> f64x2 {
1380 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1381 const_f64_as_f64x2!(bias, 1023.0);
1382 let a = cast::<_, u64x2>(self);
1383 let b = a >> 52;
1384 let c = b | cast::<_, u64x2>(pow2_52);
1385 let d = cast::<_, f64x2>(c);
1386 let e = d - (pow2_52 + bias);
1387 e
1388 }
1389
1390 #[inline]
1391 fn fraction_2(self) -> Self {
1392 let t1 = cast::<_, u64x2>(self);
1393 let t2 = cast::<_, u64x2>(
1394 (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1395 );
1396 cast::<_, f64x2>(t2)
1397 }
1398
1399 #[inline]
1400 fn is_zero_or_subnormal(self) -> Self {
1401 let t = cast::<_, i64x2>(self);
1402 let t = t & i64x2::splat(0x7FF0000000000000);
1403 i64x2::round_float(t.cmp_eq(i64x2::splat(0)))
1404 }
1405
1406 #[inline]
1407 fn infinity() -> Self {
1408 cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1409 }
1410
1411 #[inline]
1412 fn nan_log() -> Self {
1413 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1414 }
1415
1416 #[inline]
1417 fn nan_pow() -> Self {
1418 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1419 }
1420
1421 #[inline]
1422 fn sign_bit(self) -> Self {
1423 let t1 = cast::<_, i64x2>(self);
1424 let t2 = t1 >> 63;
1425 !cast::<_, f64x2>(t2).cmp_eq(f64x2::ZERO)
1426 }
1427
1428 #[inline]
1430 #[must_use]
1431 pub fn reduce_add(self) -> f64 {
1432 pick! {
1433 if #[cfg(target_feature="ssse3")] {
1434 let a = add_horizontal_m128d(self.sse, self.sse);
1435 a.to_array()[0]
1436 } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1437 let a: [f64;2] = cast(self);
1438 a.iter().sum()
1439 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1440 unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1441 } else {
1442 self.arr.iter().sum()
1443 }
1444 }
1445 }
1446
1447 #[inline]
1448 #[must_use]
1449 pub fn ln(self) -> Self {
1450 const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1451 const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1452 const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1453 const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1454 const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1455 const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1456
1457 const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1458 const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1459 const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1460 const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1461 const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1462 const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1463 const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1464 const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1465 const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1466
1467 let x1 = self;
1468 let x = Self::fraction_2(x1);
1469 let e = Self::exponent(x1);
1470 let mask = x.cmp_gt(VM_SQRT2 * f64x2::HALF);
1471 let x = (!mask).blend(x + x, x);
1472 let fe = mask.blend(e + Self::ONE, e);
1473 let x = x - Self::ONE;
1474 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1475 let x2 = x * x;
1476 let px = x2 * x * px;
1477 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1478 let res = px / qx;
1479 let res = fe.mul_add(LN2F_LO, res);
1480 let res = res + x2.mul_neg_add(f64x2::HALF, x);
1481 let res = fe.mul_add(LN2F_HI, res);
1482 let overflow = !self.is_finite();
1483 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1484 let mask = overflow | underflow;
1485 if !mask.any() {
1486 res
1487 } else {
1488 let is_zero = self.is_zero_or_subnormal();
1489 let res = underflow.blend(Self::nan_log(), res);
1490 let res = is_zero.blend(Self::infinity(), res);
1491 let res = overflow.blend(self, res);
1492 res
1493 }
1494 }
1495
1496 #[inline]
1497 #[must_use]
1498 pub fn log2(self) -> Self {
1499 Self::ln(self) * Self::LOG2_E
1500 }
1501 #[inline]
1502 #[must_use]
1503 pub fn log10(self) -> Self {
1504 Self::ln(self) * Self::LOG10_E
1505 }
1506
1507 #[inline]
1508 #[must_use]
1509 pub fn pow_f64x2(self, y: Self) -> Self {
1510 const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1511 const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1512 const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1513 const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1514 const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1515 const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1516 const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1517 const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1518 const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1519 const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1520 const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1521 const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1522 const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1523 const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1524 const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1525
1526 const_f64_as_f64x2!(p2, 1.0 / 2.0); const_f64_as_f64x2!(p3, 1.0 / 6.0);
1529 const_f64_as_f64x2!(p4, 1.0 / 24.0);
1530 const_f64_as_f64x2!(p5, 1.0 / 120.0);
1531 const_f64_as_f64x2!(p6, 1.0 / 720.0);
1532 const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1533 const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1534 const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1535 const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1536 const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1537 const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1538 const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1539
1540 let x1 = self.abs();
1541 let x = x1.fraction_2();
1542 let mask = x.cmp_gt(f64x2::SQRT_2 * f64x2::HALF);
1543 let x = (!mask).blend(x + x, x);
1544 let x = x - f64x2::ONE;
1545 let x2 = x * x;
1546 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1547 let px = px * x * x2;
1548 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1549 let lg1 = px / qx;
1550
1551 let ef = x1.exponent();
1552 let ef = mask.blend(ef + f64x2::ONE, ef);
1553 let e1 = (ef * y).round();
1554 let yr = ef.mul_sub(y, e1);
1555
1556 let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1557 let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1558 let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1559
1560 let e2 = (lg * y * f64x2::LOG2_E).round();
1561 let v = lg.mul_sub(y, e2 * ln2d_hi);
1562 let v = e2.mul_neg_add(ln2d_lo, v);
1563 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1564
1565 let x = v;
1566 let e3 = (x * f64x2::LOG2_E).round();
1567 let x = e3.mul_neg_add(f64x2::LN_2, x);
1568 let z =
1569 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1570 + f64x2::ONE;
1571 let ee = e1 + e2 + e3;
1572 let ei = cast::<_, i64x2>(ee.round_int());
1573 let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1574
1575 let overflow = cast::<_, f64x2>(!ej.cmp_lt(i64x2::splat(0x07FF)))
1576 | ee.cmp_gt(f64x2::splat(3000.0));
1577 let underflow = cast::<_, f64x2>(!ej.cmp_gt(i64x2::splat(0x000)))
1578 | ee.cmp_lt(f64x2::splat(-3000.0));
1579
1580 let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1582
1583 let z = if (overflow | underflow).any() {
1585 let z = underflow.blend(f64x2::ZERO, z);
1586 overflow.blend(Self::infinity(), z)
1587 } else {
1588 z
1589 };
1590
1591 let x_zero = self.is_zero_or_subnormal();
1593 let z = x_zero.blend(
1594 y.cmp_lt(f64x2::ZERO).blend(
1595 Self::infinity(),
1596 y.cmp_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1597 ),
1598 z,
1599 );
1600
1601 let x_sign = self.sign_bit();
1602 let z = if x_sign.any() {
1603 let yi = y.cmp_eq(y.round());
1605 let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1607
1608 let z1 =
1609 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1610 x_sign.blend(z1, z)
1611 } else {
1612 z
1613 };
1614
1615 let x_finite = self.is_finite();
1616 let y_finite = y.is_finite();
1617 let e_finite = ee.is_finite();
1618
1619 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1620 return z;
1621 }
1622
1623 (self.is_nan() | y.is_nan()).blend(self + y, z)
1624 }
1625
1626 #[inline]
1627 pub fn powf(self, y: f64) -> Self {
1628 Self::pow_f64x2(self, f64x2::splat(y))
1629 }
1630
1631 #[inline]
1632 pub fn to_array(self) -> [f64; 2] {
1633 cast(self)
1634 }
1635
1636 #[inline]
1637 pub fn as_array_ref(&self) -> &[f64; 2] {
1638 cast_ref(self)
1639 }
1640
1641 #[inline]
1642 pub fn as_array_mut(&mut self) -> &mut [f64; 2] {
1643 cast_mut(self)
1644 }
1645
1646 #[inline]
1649 pub fn from_i32x4_lower2(v: i32x4) -> Self {
1650 pick! {
1651 if #[cfg(target_feature="sse2")] {
1652 Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1653 } else if #[cfg(target_feature="simd128")] {
1654 Self { simd: f64x2_convert_low_i32x4(v.simd)}
1655 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1656 Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1657 } else {
1658 Self { arr: [
1659 v.as_array_ref()[0] as f64,
1660 v.as_array_ref()[1] as f64,
1661 ]}
1662 }
1663 }
1664 }
1665}
1666
1667impl From<i32x4> for f64x2 {
1668 #[inline]
1671 fn from(v: i32x4) -> Self {
1672 Self::from_i32x4_lower2(v)
1673 }
1674}
1675
1676impl Not for f64x2 {
1677 type Output = Self;
1678 #[inline]
1679 fn not(self) -> Self {
1680 pick! {
1681 if #[cfg(target_feature="sse2")] {
1682 Self { sse: self.sse.not() }
1683 } else if #[cfg(target_feature="simd128")] {
1684 Self { simd: v128_not(self.simd) }
1685 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1686 unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1687 } else {
1688 Self { arr: [
1689 f64::from_bits(!self.arr[0].to_bits()),
1690 f64::from_bits(!self.arr[1].to_bits()),
1691 ]}
1692 }
1693 }
1694 }
1695}