wide/
i64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i64x2 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i64x2 { pub(crate) simd: v128 }
14
15    impl Default for i64x2 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(i64x2_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i64x2 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i64x2 { pub(crate) neon : int64x2_t }
33
34    impl Default for i64x2 {
35      #[inline]
36      #[must_use]
37      fn default() -> Self {
38        unsafe { Self { neon: vdupq_n_s64(0)} }
39      }
40    }
41
42    impl PartialEq for i64x2 {
43      #[inline]
44      #[must_use]
45      fn eq(&self, other: &Self) -> bool {
46        unsafe {
47          vgetq_lane_s64(self.neon,0) == vgetq_lane_s64(other.neon,0) && vgetq_lane_s64(self.neon,1) == vgetq_lane_s64(other.neon,1)
48        }
49      }
50    }
51
52    impl Eq for i64x2 { }
53  } else {
54    #[derive(Default, Clone, Copy, PartialEq, Eq)]
55    #[repr(C, align(16))]
56    pub struct i64x2 { arr: [i64;2] }
57  }
58}
59
60int_uint_consts!(i64, 2, i64x2, 128);
61
62unsafe impl Zeroable for i64x2 {}
63unsafe impl Pod for i64x2 {}
64
65impl Add for i64x2 {
66  type Output = Self;
67  #[inline]
68  #[must_use]
69  fn add(self, rhs: Self) -> Self::Output {
70    pick! {
71      if #[cfg(target_feature="sse2")] {
72        Self { sse: add_i64_m128i(self.sse, rhs.sse) }
73      } else if #[cfg(target_feature="simd128")] {
74        Self { simd: i64x2_add(self.simd, rhs.simd) }
75      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
76        unsafe { Self { neon: vaddq_s64(self.neon, rhs.neon) } }
77      } else {
78        Self { arr: [
79          self.arr[0].wrapping_add(rhs.arr[0]),
80          self.arr[1].wrapping_add(rhs.arr[1]),
81        ]}
82      }
83    }
84  }
85}
86
87impl Sub for i64x2 {
88  type Output = Self;
89  #[inline]
90  #[must_use]
91  fn sub(self, rhs: Self) -> Self::Output {
92    pick! {
93      if #[cfg(target_feature="sse2")] {
94        Self { sse: sub_i64_m128i(self.sse, rhs.sse) }
95      } else if #[cfg(target_feature="simd128")] {
96        Self { simd: i64x2_sub(self.simd, rhs.simd) }
97      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98        unsafe { Self { neon: vsubq_s64(self.neon, rhs.neon) } }
99      } else {
100        Self { arr: [
101          self.arr[0].wrapping_sub(rhs.arr[0]),
102          self.arr[1].wrapping_sub(rhs.arr[1]),
103        ]}
104      }
105    }
106  }
107}
108
109//we should try to implement this on sse2
110impl Mul for i64x2 {
111  type Output = Self;
112  #[inline]
113  #[must_use]
114  fn mul(self, rhs: Self) -> Self::Output {
115    pick! {
116      if #[cfg(target_feature="simd128")] {
117        Self { simd: i64x2_mul(self.simd, rhs.simd) }
118      } else {
119        let arr1: [i64; 2] = cast(self);
120        let arr2: [i64; 2] = cast(rhs);
121        cast([
122          arr1[0].wrapping_mul(arr2[0]),
123          arr1[1].wrapping_mul(arr2[1]),
124        ])
125      }
126    }
127  }
128}
129
130impl Add<i64> for i64x2 {
131  type Output = Self;
132  #[inline]
133  #[must_use]
134  fn add(self, rhs: i64) -> Self::Output {
135    self.add(Self::splat(rhs))
136  }
137}
138
139impl Sub<i64> for i64x2 {
140  type Output = Self;
141  #[inline]
142  #[must_use]
143  fn sub(self, rhs: i64) -> Self::Output {
144    self.sub(Self::splat(rhs))
145  }
146}
147
148impl Mul<i64> for i64x2 {
149  type Output = Self;
150  #[inline]
151  #[must_use]
152  fn mul(self, rhs: i64) -> Self::Output {
153    self.mul(Self::splat(rhs))
154  }
155}
156
157impl Add<i64x2> for i64 {
158  type Output = i64x2;
159  #[inline]
160  #[must_use]
161  fn add(self, rhs: i64x2) -> Self::Output {
162    i64x2::splat(self).add(rhs)
163  }
164}
165
166impl Sub<i64x2> for i64 {
167  type Output = i64x2;
168  #[inline]
169  #[must_use]
170  fn sub(self, rhs: i64x2) -> Self::Output {
171    i64x2::splat(self).sub(rhs)
172  }
173}
174
175impl Mul<i64x2> for i64 {
176  type Output = i64x2;
177  #[inline]
178  #[must_use]
179  fn mul(self, rhs: i64x2) -> Self::Output {
180    i64x2::splat(self).mul(rhs)
181  }
182}
183
184impl BitAnd for i64x2 {
185  type Output = Self;
186  #[inline]
187  #[must_use]
188  fn bitand(self, rhs: Self) -> Self::Output {
189    pick! {
190      if #[cfg(target_feature="sse2")] {
191        Self { sse: bitand_m128i(self.sse, rhs.sse) }
192      } else if #[cfg(target_feature="simd128")] {
193        Self { simd: v128_and(self.simd, rhs.simd) }
194      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
195        unsafe {Self { neon: vandq_s64(self.neon, rhs.neon) }}
196      } else {
197        Self { arr: [
198          self.arr[0].bitand(rhs.arr[0]),
199          self.arr[1].bitand(rhs.arr[1]),
200        ]}
201      }
202    }
203  }
204}
205
206impl BitOr for i64x2 {
207  type Output = Self;
208  #[inline]
209  #[must_use]
210  fn bitor(self, rhs: Self) -> Self::Output {
211    pick! {
212      if #[cfg(target_feature="sse2")] {
213        Self { sse: bitor_m128i(self.sse, rhs.sse) }
214      } else if #[cfg(target_feature="simd128")] {
215        Self { simd: v128_or(self.simd, rhs.simd) }
216      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
217        unsafe {Self { neon: vorrq_s64(self.neon, rhs.neon) }}
218      } else {
219        Self { arr: [
220          self.arr[0].bitor(rhs.arr[0]),
221          self.arr[1].bitor(rhs.arr[1]),
222        ]}
223      }
224    }
225  }
226}
227
228impl BitXor for i64x2 {
229  type Output = Self;
230  #[inline]
231  #[must_use]
232  fn bitxor(self, rhs: Self) -> Self::Output {
233    pick! {
234      if #[cfg(target_feature="sse2")] {
235        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
236      } else if #[cfg(target_feature="simd128")] {
237        Self { simd: v128_xor(self.simd, rhs.simd) }
238      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
239        unsafe {Self { neon: veorq_s64(self.neon, rhs.neon) }}
240      } else {
241        Self { arr: [
242          self.arr[0].bitxor(rhs.arr[0]),
243          self.arr[1].bitxor(rhs.arr[1]),
244        ]}
245      }
246    }
247  }
248}
249
250macro_rules! impl_shl_t_for_i64x2 {
251  ($($shift_type:ty),+ $(,)?) => {
252    $(impl Shl<$shift_type> for i64x2 {
253      type Output = Self;
254      /// Shifts all lanes by the value given.
255      #[inline]
256      #[must_use]
257      fn shl(self, rhs: $shift_type) -> Self::Output {
258        pick! {
259          if #[cfg(target_feature="sse2")] {
260            let shift = cast([rhs as u64, 0]);
261            Self { sse: shl_all_u64_m128i(self.sse, shift) }
262          } else if #[cfg(target_feature="simd128")] {
263            Self { simd: i64x2_shl(self.simd, rhs as u32) }
264          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
265            unsafe {Self { neon: vshlq_s64(self.neon, vmovq_n_s64(rhs as i64)) }}
266          } else {
267            let u = rhs as u64;
268            Self { arr: [
269              self.arr[0] << u,
270              self.arr[1] << u,
271            ]}
272          }
273        }
274      }
275    })+
276  };
277}
278impl_shl_t_for_i64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
279
280macro_rules! impl_shr_t_for_i64x2 {
281  ($($shift_type:ty),+ $(,)?) => {
282    $(impl Shr<$shift_type> for i64x2 {
283      type Output = Self;
284      /// Shifts all lanes by the value given.
285      #[inline]
286      #[must_use]
287      fn shr(self, rhs: $shift_type) -> Self::Output {
288        pick! {
289          if #[cfg(target_feature="simd128")] {
290            Self { simd: i64x2_shr(self.simd, rhs as u32) }
291          } else {
292            let u = rhs as u64;
293            let arr: [i64; 2] = cast(self);
294            cast([
295              arr[0] >> u,
296              arr[1] >> u,
297            ])
298          }
299        }
300      }
301    })+
302  };
303}
304
305impl_shr_t_for_i64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
306
307impl CmpEq for i64x2 {
308  type Output = Self;
309  #[inline]
310  #[must_use]
311  fn cmp_eq(self, rhs: Self) -> Self::Output {
312    pick! {
313      if #[cfg(target_feature="sse4.1")] {
314        Self { sse: cmp_eq_mask_i64_m128i(self.sse, rhs.sse) }
315      } else if #[cfg(target_feature="simd128")] {
316        Self { simd: i64x2_eq(self.simd, rhs.simd) }
317      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
318        unsafe {Self { neon: vreinterpretq_s64_u64(vceqq_s64(self.neon, rhs.neon)) }}
319      } else {
320        let s: [i64;2] = cast(self);
321        let r: [i64;2] = cast(rhs);
322        cast([
323          if s[0] == r[0] { -1_i64 } else { 0 },
324          if s[1] == r[1] { -1_i64 } else { 0 },
325        ])
326      }
327    }
328  }
329}
330
331impl CmpGt for i64x2 {
332  type Output = Self;
333  #[inline]
334  #[must_use]
335  fn cmp_gt(self, rhs: Self) -> Self::Output {
336    pick! {
337      if #[cfg(target_feature="sse4.2")] {
338        Self { sse: cmp_gt_mask_i64_m128i(self.sse, rhs.sse) }
339      } else if #[cfg(target_feature="simd128")] {
340        Self { simd: i64x2_gt(self.simd, rhs.simd) }
341      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
342        unsafe {Self { neon: vreinterpretq_s64_u64(vcgtq_s64(self.neon, rhs.neon)) }}
343      } else {
344        let s: [i64;2] = cast(self);
345        let r: [i64;2] = cast(rhs);
346        cast([
347          if s[0] > r[0] { -1_i64 } else { 0 },
348          if s[1] > r[1] { -1_i64 } else { 0 },
349        ])
350      }
351    }
352  }
353}
354
355impl CmpLt for i64x2 {
356  type Output = Self;
357  #[inline]
358  #[must_use]
359  fn cmp_lt(self, rhs: Self) -> Self::Output {
360    pick! {
361      if #[cfg(target_feature="sse4.2")] {
362        Self { sse: !cmp_gt_mask_i64_m128i(self.sse, rhs.sse) }
363      } else if #[cfg(target_feature="simd128")] {
364        Self { simd: i64x2_lt(self.simd, rhs.simd) }
365      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
366        unsafe {Self { neon: vreinterpretq_s64_u64(vcltq_s64(self.neon, rhs.neon)) }}
367      } else {
368        let s: [i64;2] = cast(self);
369        let r: [i64;2] = cast(rhs);
370        cast([
371          if s[0] < r[0] { -1_i64 } else { 0 },
372          if s[1] < r[1] { -1_i64 } else { 0 },
373        ])
374      }
375    }
376  }
377}
378
379impl i64x2 {
380  #[inline]
381  #[must_use]
382  pub const fn new(array: [i64; 2]) -> Self {
383    unsafe { core::intrinsics::transmute(array) }
384  }
385  #[inline]
386  #[must_use]
387  pub fn blend(self, t: Self, f: Self) -> Self {
388    pick! {
389      if #[cfg(target_feature="sse4.1")] {
390        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
391      } else if #[cfg(target_feature="simd128")] {
392        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
393      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
394        unsafe {Self { neon: vbslq_s64(vreinterpretq_u64_s64(self.neon), t.neon, f.neon) }}
395      } else {
396        generic_bit_blend(self, t, f)
397      }
398    }
399  }
400
401  #[inline]
402  #[must_use]
403  pub fn abs(self) -> Self {
404    pick! {
405      // x86 doesn't have this builtin
406      if #[cfg(target_feature="simd128")] {
407        Self { simd: i64x2_abs(self.simd) }
408      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
409        unsafe {Self { neon: vabsq_s64(self.neon) }}
410      } else {
411        let arr: [i64; 2] = cast(self);
412        cast(
413          [
414            arr[0].wrapping_abs(),
415            arr[1].wrapping_abs(),
416          ])
417      }
418    }
419  }
420
421  #[inline]
422  #[must_use]
423  pub fn unsigned_abs(self) -> u64x2 {
424    pick! {
425      // x86 doesn't have this builtin
426      if #[cfg(target_feature="simd128")] {
427        u64x2 { simd: i64x2_abs(self.simd) }
428      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
429        unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }}
430      } else {
431        let arr: [i64; 2] = cast(self);
432        cast(
433          [
434            arr[0].unsigned_abs(),
435            arr[1].unsigned_abs(),
436          ])
437      }
438    }
439  }
440
441  #[inline]
442  #[must_use]
443  pub fn round_float(self) -> f64x2 {
444    let arr: [i64; 2] = cast(self);
445    cast([arr[0] as f64, arr[1] as f64])
446  }
447
448  /// returns the bit mask for each high bit set in the vector with the lowest
449  /// lane being the lowest bit
450  #[inline]
451  #[must_use]
452  pub fn move_mask(self) -> i32 {
453    pick! {
454      if #[cfg(target_feature="sse")] {
455        // use f64 move_mask since it is the same size as i64
456        move_mask_m128d(cast(self.sse))
457      } else if #[cfg(target_feature="simd128")] {
458        i64x2_bitmask(self.simd) as i32
459      } else {
460        // nothing amazingly efficient for neon
461        let arr: [u64; 2] = cast(self);
462        (arr[0] >> 63 | ((arr[1] >> 62) & 2)) as i32
463      }
464    }
465  }
466
467  /// true if any high bits are set for any value in the vector
468  #[inline]
469  #[must_use]
470  pub fn any(self) -> bool {
471    pick! {
472      if #[cfg(target_feature="sse")] {
473        // use f64 move_mask since it is the same size as i64
474        move_mask_m128d(cast(self.sse)) != 0
475      } else if #[cfg(target_feature="simd128")] {
476        i64x2_bitmask(self.simd) != 0
477      } else {
478        let v : [u64;2] = cast(self);
479        ((v[0] | v[1]) & 0x8000000000000000) != 0
480      }
481    }
482  }
483
484  /// true if all high bits are set for every value in the vector
485  #[inline]
486  #[must_use]
487  pub fn all(self) -> bool {
488    pick! {
489      if #[cfg(target_feature="avx2")] {
490        // use f64 move_mask since it is the same size as i64
491        move_mask_m128d(cast(self.sse)) == 0b11
492      }  else if #[cfg(target_feature="simd128")] {
493        i64x2_bitmask(self.simd) == 0b11
494      } else {
495        let v : [u64;2] = cast(self);
496        ((v[0] & v[1]) & 0x8000000000000000) == 0x8000000000000000
497      }
498    }
499  }
500
501  /// true if no high bits are set for any values of the vector
502  #[inline]
503  #[must_use]
504  pub fn none(self) -> bool {
505    !self.any()
506  }
507
508  #[inline]
509  pub fn to_array(self) -> [i64; 2] {
510    cast(self)
511  }
512
513  #[inline]
514  pub fn as_array_ref(&self) -> &[i64; 2] {
515    cast_ref(self)
516  }
517
518  #[inline]
519  pub fn as_array_mut(&mut self) -> &mut [i64; 2] {
520    cast_mut(self)
521  }
522}