wide/
u64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct u64x2 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct u64x2 { pub(crate) simd: v128 }
14
15    impl Default for u64x2 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for u64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(u64x2_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for u64x2 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct u64x2 { pub(crate) neon : uint64x2_t }
33
34    impl Default for u64x2 {
35      #[inline]
36      #[must_use]
37      fn default() -> Self {
38        unsafe { Self { neon: vdupq_n_u64(0)} }
39      }
40    }
41
42    impl PartialEq for u64x2 {
43      #[inline]
44      #[must_use]
45      fn eq(&self, other: &Self) -> bool {
46        unsafe {
47          vgetq_lane_u64(self.neon,0) == vgetq_lane_u64(other.neon,0) && vgetq_lane_u64(self.neon,1) == vgetq_lane_u64(other.neon,1)
48        }
49      }
50    }
51
52    impl Eq for u64x2 { }
53  } else {
54    #[derive(Default, Clone, Copy, PartialEq, Eq)]
55    #[repr(C, align(16))]
56    pub struct u64x2 { arr: [u64;2] }
57  }
58}
59
60int_uint_consts!(u64, 2, u64x2, 128);
61
62unsafe impl Zeroable for u64x2 {}
63unsafe impl Pod for u64x2 {}
64
65impl Add for u64x2 {
66  type Output = Self;
67  #[inline]
68  #[must_use]
69  fn add(self, rhs: Self) -> Self::Output {
70    pick! {
71      if #[cfg(target_feature="sse2")] {
72        Self { sse: add_i64_m128i(self.sse, rhs.sse) }
73      } else if #[cfg(target_feature="simd128")] {
74        Self { simd: u64x2_add(self.simd, rhs.simd) }
75      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
76        unsafe { Self { neon: vaddq_u64(self.neon, rhs.neon) } }
77      } else {
78        Self { arr: [
79          self.arr[0].wrapping_add(rhs.arr[0]),
80          self.arr[1].wrapping_add(rhs.arr[1]),
81        ]}
82      }
83    }
84  }
85}
86
87impl Sub for u64x2 {
88  type Output = Self;
89  #[inline]
90  #[must_use]
91  fn sub(self, rhs: Self) -> Self::Output {
92    pick! {
93      if #[cfg(target_feature="sse2")] {
94        Self { sse: sub_i64_m128i(self.sse, rhs.sse) }
95      } else if #[cfg(target_feature="simd128")] {
96        Self { simd: u64x2_sub(self.simd, rhs.simd) }
97      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98        unsafe { Self { neon: vsubq_u64(self.neon, rhs.neon) } }
99      } else {
100        Self { arr: [
101          self.arr[0].wrapping_sub(rhs.arr[0]),
102          self.arr[1].wrapping_sub(rhs.arr[1]),
103        ]}
104      }
105    }
106  }
107}
108
109//we should try to implement this on sse2
110impl Mul for u64x2 {
111  type Output = Self;
112  #[inline]
113  #[must_use]
114  fn mul(self, rhs: Self) -> Self::Output {
115    pick! {
116      if #[cfg(target_feature="simd128")] {
117        Self { simd: u64x2_mul(self.simd, rhs.simd) }
118      } else {
119        let arr1: [u64; 2] = cast(self);
120        let arr2: [u64; 2] = cast(rhs);
121        cast([
122          arr1[0].wrapping_mul(arr2[0]),
123          arr1[1].wrapping_mul(arr2[1]),
124        ])
125      }
126    }
127  }
128}
129
130impl Add<u64> for u64x2 {
131  type Output = Self;
132  #[inline]
133  #[must_use]
134  fn add(self, rhs: u64) -> Self::Output {
135    self.add(Self::splat(rhs))
136  }
137}
138
139impl Sub<u64> for u64x2 {
140  type Output = Self;
141  #[inline]
142  #[must_use]
143  fn sub(self, rhs: u64) -> Self::Output {
144    self.sub(Self::splat(rhs))
145  }
146}
147
148impl Mul<u64> for u64x2 {
149  type Output = Self;
150  #[inline]
151  #[must_use]
152  fn mul(self, rhs: u64) -> Self::Output {
153    self.mul(Self::splat(rhs))
154  }
155}
156
157impl Add<u64x2> for u64 {
158  type Output = u64x2;
159  #[inline]
160  #[must_use]
161  fn add(self, rhs: u64x2) -> Self::Output {
162    u64x2::splat(self).add(rhs)
163  }
164}
165
166impl Sub<u64x2> for u64 {
167  type Output = u64x2;
168  #[inline]
169  #[must_use]
170  fn sub(self, rhs: u64x2) -> Self::Output {
171    u64x2::splat(self).sub(rhs)
172  }
173}
174
175impl Mul<u64x2> for u64 {
176  type Output = u64x2;
177  #[inline]
178  #[must_use]
179  fn mul(self, rhs: u64x2) -> Self::Output {
180    u64x2::splat(self).mul(rhs)
181  }
182}
183
184impl BitAnd for u64x2 {
185  type Output = Self;
186  #[inline]
187  #[must_use]
188  fn bitand(self, rhs: Self) -> Self::Output {
189    pick! {
190      if #[cfg(target_feature="sse2")] {
191        Self { sse: bitand_m128i(self.sse, rhs.sse) }
192      } else if #[cfg(target_feature="simd128")] {
193        Self { simd: v128_and(self.simd, rhs.simd) }
194      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
195        unsafe {Self { neon: vandq_u64(self.neon, rhs.neon) }}
196      } else {
197        Self { arr: [
198          self.arr[0].bitand(rhs.arr[0]),
199          self.arr[1].bitand(rhs.arr[1]),
200        ]}
201      }
202    }
203  }
204}
205
206impl BitOr for u64x2 {
207  type Output = Self;
208  #[inline]
209  #[must_use]
210  fn bitor(self, rhs: Self) -> Self::Output {
211    pick! {
212      if #[cfg(target_feature="sse2")] {
213        Self { sse: bitor_m128i(self.sse, rhs.sse) }
214      } else if #[cfg(target_feature="simd128")] {
215        Self { simd: v128_or(self.simd, rhs.simd) }
216      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
217        unsafe {Self { neon: vorrq_u64(self.neon, rhs.neon) }}
218      } else {
219        Self { arr: [
220          self.arr[0].bitor(rhs.arr[0]),
221          self.arr[1].bitor(rhs.arr[1]),
222        ]}
223      }
224    }
225  }
226}
227
228impl BitXor for u64x2 {
229  type Output = Self;
230  #[inline]
231  #[must_use]
232  fn bitxor(self, rhs: Self) -> Self::Output {
233    pick! {
234      if #[cfg(target_feature="sse2")] {
235        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
236      } else if #[cfg(target_feature="simd128")] {
237        Self { simd: v128_xor(self.simd, rhs.simd) }
238      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
239        unsafe {Self { neon: veorq_u64(self.neon, rhs.neon) }}
240      } else {
241        Self { arr: [
242          self.arr[0].bitxor(rhs.arr[0]),
243          self.arr[1].bitxor(rhs.arr[1]),
244        ]}
245      }
246    }
247  }
248}
249
250macro_rules! impl_shl_t_for_u64x2 {
251  ($($shift_type:ty),+ $(,)?) => {
252    $(impl Shl<$shift_type> for u64x2 {
253      type Output = Self;
254      /// Shifts all lanes by the value given.
255      #[inline]
256      #[must_use]
257      fn shl(self, rhs: $shift_type) -> Self::Output {
258        pick! {
259          if #[cfg(target_feature="sse2")] {
260            let shift = cast([rhs as u64, 0]);
261            Self { sse: shl_all_u64_m128i(self.sse, shift) }
262          } else if #[cfg(target_feature="simd128")] {
263            Self { simd: u64x2_shl(self.simd, rhs as u32) }
264          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
265            unsafe {Self { neon: vshlq_u64(self.neon, vmovq_n_s64(rhs as i64)) }}
266          } else {
267            let u = rhs as u64;
268            Self { arr: [
269              self.arr[0] << u,
270              self.arr[1] << u,
271            ]}
272          }
273        }
274      }
275    })+
276  };
277}
278impl_shl_t_for_u64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
279
280macro_rules! impl_shr_t_for_u64x2 {
281  ($($shift_type:ty),+ $(,)?) => {
282    $(impl Shr<$shift_type> for u64x2 {
283      type Output = Self;
284      /// Shifts all lanes by the value given.
285      #[inline]
286      #[must_use]
287      fn shr(self, rhs: $shift_type) -> Self::Output {
288        pick! {
289          if #[cfg(target_feature="sse2")] {
290            let shift = cast([rhs as u64, 0]);
291            Self { sse: shr_all_u64_m128i(self.sse, shift) }
292          } else if #[cfg(target_feature="simd128")] {
293            Self { simd: u64x2_shr(self.simd, rhs as u32) }
294          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
295            unsafe {Self { neon: vshlq_u64(self.neon, vmovq_n_s64(-(rhs as i64))) }}
296          } else {
297            let u = rhs as u64;
298            Self { arr: [
299              self.arr[0] >> u,
300              self.arr[1] >> u,
301            ]}
302          }
303        }
304      }
305    })+
306  };
307}
308impl_shr_t_for_u64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
309
310impl u64x2 {
311  #[inline]
312  #[must_use]
313  pub const fn new(array: [u64; 2]) -> Self {
314    unsafe { core::intrinsics::transmute(array) }
315  }
316  #[inline]
317  #[must_use]
318  pub fn cmp_eq(self, rhs: Self) -> Self {
319    pick! {
320      if #[cfg(target_feature="sse4.1")] {
321        Self { sse: cmp_eq_mask_i64_m128i(self.sse, rhs.sse) }
322      } else if #[cfg(target_feature="simd128")] {
323        Self { simd: u64x2_eq(self.simd, rhs.simd) }
324      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
325        unsafe {Self { neon: vceqq_u64(self.neon, rhs.neon) } }
326      } else {
327        let s: [u64;2] = cast(self);
328        let r: [u64;2] = cast(rhs);
329        cast([
330          if s[0] == r[0] { -1_i64 } else { 0 },
331          if s[1] == r[1] { -1_i64 } else { 0 },
332        ])
333      }
334    }
335  }
336  #[inline]
337  #[must_use]
338  pub fn cmp_gt(self, rhs: Self) -> Self {
339    pick! {
340      if #[cfg(target_feature="sse4.2")] {
341        // no unsigned gt so inverting the high bit will get the correct result
342        let highbit = u64x2::splat(1 << 63);
343        Self { sse: cmp_gt_mask_i64_m128i((self ^ highbit).sse, (rhs ^ highbit).sse) }
344      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
345        unsafe {Self { neon: vcgtq_u64(self.neon, rhs.neon) }}
346      } else {
347        // u64x2_gt on WASM is not a thing. https://github.com/WebAssembly/simd/pull/414
348        let s: [u64;2] = cast(self);
349        let r: [u64;2] = cast(rhs);
350        cast([
351          if s[0] > r[0] { u64::MAX } else { 0 },
352          if s[1] > r[1] { u64::MAX } else { 0 },
353        ])
354      }
355    }
356  }
357
358  #[inline]
359  #[must_use]
360  pub fn cmp_lt(self, rhs: Self) -> Self {
361    // lt is just gt the other way around
362    rhs.cmp_gt(self)
363  }
364
365  #[inline]
366  #[must_use]
367  pub fn blend(self, t: Self, f: Self) -> Self {
368    pick! {
369      if #[cfg(target_feature="sse4.1")] {
370        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
371      } else if #[cfg(target_feature="simd128")] {
372        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
373      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
374        unsafe {Self { neon: vbslq_u64(self.neon, t.neon, f.neon) }}
375      } else {
376        generic_bit_blend(self, t, f)
377      }
378    }
379  }
380
381  #[inline]
382  pub fn to_array(self) -> [u64; 2] {
383    cast(self)
384  }
385
386  #[inline]
387  pub fn as_array_ref(&self) -> &[u64; 2] {
388    cast_ref(self)
389  }
390
391  #[inline]
392  pub fn as_array_mut(&mut self) -> &mut [u64; 2] {
393    cast_mut(self)
394  }
395}
wide/u64x2_.rs

wide/
u64x2_.rs