wide/
i8x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i8x16 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i8x16 { pub(crate) simd: v128 }
14
15    impl Default for i8x16 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i8x16 {
22      fn eq(&self, other: &Self) -> bool {
23        u8x16_all_true(i8x16_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i8x16 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i8x16 { pub(crate) neon : int8x16_t }
33
34    impl Default for i8x16 {
35      #[inline]
36      #[must_use]
37      fn default() -> Self {
38        Self::splat(0)
39      }
40    }
41
42    impl PartialEq for i8x16 {
43      #[inline]
44      #[must_use]
45      fn eq(&self, other: &Self) -> bool {
46        unsafe { vminvq_u8(vceqq_s8(self.neon, other.neon))==u8::MAX }
47      }
48    }
49
50    impl Eq for i8x16 { }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq, Eq)]
53    #[repr(C, align(16))]
54    pub struct i8x16 { arr: [i8;16] }
55  }
56}
57
58int_uint_consts!(i8, 16, i8x16, 128);
59
60unsafe impl Zeroable for i8x16 {}
61unsafe impl Pod for i8x16 {}
62
63impl Add for i8x16 {
64  type Output = Self;
65  #[inline]
66  #[must_use]
67  fn add(self, rhs: Self) -> Self::Output {
68    pick! {
69      if #[cfg(target_feature="sse2")] {
70        Self { sse: add_i8_m128i(self.sse, rhs.sse) }
71      } else if #[cfg(target_feature="simd128")] {
72        Self { simd: i8x16_add(self.simd, rhs.simd) }
73      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74        unsafe { Self { neon: vaddq_s8(self.neon, rhs.neon) } }
75      } else {
76        Self { arr: [
77          self.arr[0].wrapping_add(rhs.arr[0]),
78          self.arr[1].wrapping_add(rhs.arr[1]),
79          self.arr[2].wrapping_add(rhs.arr[2]),
80          self.arr[3].wrapping_add(rhs.arr[3]),
81          self.arr[4].wrapping_add(rhs.arr[4]),
82          self.arr[5].wrapping_add(rhs.arr[5]),
83          self.arr[6].wrapping_add(rhs.arr[6]),
84          self.arr[7].wrapping_add(rhs.arr[7]),
85          self.arr[8].wrapping_add(rhs.arr[8]),
86          self.arr[9].wrapping_add(rhs.arr[9]),
87          self.arr[10].wrapping_add(rhs.arr[10]),
88          self.arr[11].wrapping_add(rhs.arr[11]),
89          self.arr[12].wrapping_add(rhs.arr[12]),
90          self.arr[13].wrapping_add(rhs.arr[13]),
91          self.arr[14].wrapping_add(rhs.arr[14]),
92          self.arr[15].wrapping_add(rhs.arr[15]),
93        ]}
94      }
95    }
96  }
97}
98
99impl Sub for i8x16 {
100  type Output = Self;
101  #[inline]
102  #[must_use]
103  fn sub(self, rhs: Self) -> Self::Output {
104    pick! {
105      if #[cfg(target_feature="sse2")] {
106        Self { sse: sub_i8_m128i(self.sse, rhs.sse) }
107      } else if #[cfg(target_feature="simd128")] {
108        Self { simd: i8x16_sub(self.simd, rhs.simd) }
109      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
110        unsafe {Self { neon: vsubq_s8(self.neon, rhs.neon) }}
111      } else {
112        Self { arr: [
113          self.arr[0].wrapping_sub(rhs.arr[0]),
114          self.arr[1].wrapping_sub(rhs.arr[1]),
115          self.arr[2].wrapping_sub(rhs.arr[2]),
116          self.arr[3].wrapping_sub(rhs.arr[3]),
117          self.arr[4].wrapping_sub(rhs.arr[4]),
118          self.arr[5].wrapping_sub(rhs.arr[5]),
119          self.arr[6].wrapping_sub(rhs.arr[6]),
120          self.arr[7].wrapping_sub(rhs.arr[7]),
121          self.arr[8].wrapping_sub(rhs.arr[8]),
122          self.arr[9].wrapping_sub(rhs.arr[9]),
123          self.arr[10].wrapping_sub(rhs.arr[10]),
124          self.arr[11].wrapping_sub(rhs.arr[11]),
125          self.arr[12].wrapping_sub(rhs.arr[12]),
126          self.arr[13].wrapping_sub(rhs.arr[13]),
127          self.arr[14].wrapping_sub(rhs.arr[14]),
128          self.arr[15].wrapping_sub(rhs.arr[15]),
129        ]}
130      }
131    }
132  }
133}
134
135impl Add<i8> for i8x16 {
136  type Output = Self;
137  #[inline]
138  #[must_use]
139  fn add(self, rhs: i8) -> Self::Output {
140    self.add(Self::splat(rhs))
141  }
142}
143
144impl Sub<i8> for i8x16 {
145  type Output = Self;
146  #[inline]
147  #[must_use]
148  fn sub(self, rhs: i8) -> Self::Output {
149    self.sub(Self::splat(rhs))
150  }
151}
152
153impl Add<i8x16> for i8 {
154  type Output = i8x16;
155  #[inline]
156  #[must_use]
157  fn add(self, rhs: i8x16) -> Self::Output {
158    i8x16::splat(self).add(rhs)
159  }
160}
161
162impl Sub<i8x16> for i8 {
163  type Output = i8x16;
164  #[inline]
165  #[must_use]
166  fn sub(self, rhs: i8x16) -> Self::Output {
167    i8x16::splat(self).sub(rhs)
168  }
169}
170
171impl BitAnd for i8x16 {
172  type Output = Self;
173  #[inline]
174  #[must_use]
175  fn bitand(self, rhs: Self) -> Self::Output {
176    pick! {
177      if #[cfg(target_feature="sse2")] {
178        Self { sse: bitand_m128i(self.sse, rhs.sse) }
179      } else if #[cfg(target_feature="simd128")] {
180        Self { simd: v128_and(self.simd, rhs.simd) }
181      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
182        unsafe {Self { neon: vandq_s8(self.neon, rhs.neon) }}
183      } else {
184        Self { arr: [
185          self.arr[0].bitand(rhs.arr[0]),
186          self.arr[1].bitand(rhs.arr[1]),
187          self.arr[2].bitand(rhs.arr[2]),
188          self.arr[3].bitand(rhs.arr[3]),
189          self.arr[4].bitand(rhs.arr[4]),
190          self.arr[5].bitand(rhs.arr[5]),
191          self.arr[6].bitand(rhs.arr[6]),
192          self.arr[7].bitand(rhs.arr[7]),
193          self.arr[8].bitand(rhs.arr[8]),
194          self.arr[9].bitand(rhs.arr[9]),
195          self.arr[10].bitand(rhs.arr[10]),
196          self.arr[11].bitand(rhs.arr[11]),
197          self.arr[12].bitand(rhs.arr[12]),
198          self.arr[13].bitand(rhs.arr[13]),
199          self.arr[14].bitand(rhs.arr[14]),
200          self.arr[15].bitand(rhs.arr[15]),
201        ]}
202      }
203    }
204  }
205}
206
207impl BitOr for i8x16 {
208  type Output = Self;
209  #[inline]
210  #[must_use]
211  fn bitor(self, rhs: Self) -> Self::Output {
212    pick! {
213      if #[cfg(target_feature="sse2")] {
214        Self { sse: bitor_m128i(self.sse, rhs.sse) }
215      } else if #[cfg(target_feature="simd128")] {
216        Self { simd: v128_or(self.simd, rhs.simd) }
217      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
218        unsafe {Self { neon: vorrq_s8(self.neon, rhs.neon) }}
219      } else {
220        Self { arr: [
221          self.arr[0].bitor(rhs.arr[0]),
222          self.arr[1].bitor(rhs.arr[1]),
223          self.arr[2].bitor(rhs.arr[2]),
224          self.arr[3].bitor(rhs.arr[3]),
225          self.arr[4].bitor(rhs.arr[4]),
226          self.arr[5].bitor(rhs.arr[5]),
227          self.arr[6].bitor(rhs.arr[6]),
228          self.arr[7].bitor(rhs.arr[7]),
229          self.arr[8].bitor(rhs.arr[8]),
230          self.arr[9].bitor(rhs.arr[9]),
231          self.arr[10].bitor(rhs.arr[10]),
232          self.arr[11].bitor(rhs.arr[11]),
233          self.arr[12].bitor(rhs.arr[12]),
234          self.arr[13].bitor(rhs.arr[13]),
235          self.arr[14].bitor(rhs.arr[14]),
236          self.arr[15].bitor(rhs.arr[15]),
237        ]}
238      }
239    }
240  }
241}
242
243impl BitXor for i8x16 {
244  type Output = Self;
245  #[inline]
246  #[must_use]
247  fn bitxor(self, rhs: Self) -> Self::Output {
248    pick! {
249      if #[cfg(target_feature="sse2")] {
250        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
251      } else if #[cfg(target_feature="simd128")] {
252        Self { simd: v128_xor(self.simd, rhs.simd) }
253      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
254        unsafe {Self { neon: veorq_s8(self.neon, rhs.neon) }}
255      } else {
256        Self { arr: [
257          self.arr[0].bitxor(rhs.arr[0]),
258          self.arr[1].bitxor(rhs.arr[1]),
259          self.arr[2].bitxor(rhs.arr[2]),
260          self.arr[3].bitxor(rhs.arr[3]),
261          self.arr[4].bitxor(rhs.arr[4]),
262          self.arr[5].bitxor(rhs.arr[5]),
263          self.arr[6].bitxor(rhs.arr[6]),
264          self.arr[7].bitxor(rhs.arr[7]),
265          self.arr[8].bitxor(rhs.arr[8]),
266          self.arr[9].bitxor(rhs.arr[9]),
267          self.arr[10].bitxor(rhs.arr[10]),
268          self.arr[11].bitxor(rhs.arr[11]),
269          self.arr[12].bitxor(rhs.arr[12]),
270          self.arr[13].bitxor(rhs.arr[13]),
271          self.arr[14].bitxor(rhs.arr[14]),
272          self.arr[15].bitxor(rhs.arr[15]),
273        ]}
274      }
275    }
276  }
277}
278
279impl CmpEq for i8x16 {
280  type Output = Self;
281  #[inline]
282  #[must_use]
283  fn cmp_eq(self, rhs: Self) -> Self::Output {
284    pick! {
285      if #[cfg(target_feature="sse2")] {
286        Self { sse: cmp_eq_mask_i8_m128i(self.sse, rhs.sse) }
287      } else if #[cfg(target_feature="simd128")] {
288        Self { simd: i8x16_eq(self.simd, rhs.simd) }
289      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
290        unsafe {Self { neon: vreinterpretq_s8_u8(vceqq_s8(self.neon, rhs.neon)) }}
291      } else {
292        Self { arr: [
293          if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
294          if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
295          if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
296          if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
297          if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
298          if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
299          if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
300          if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
301          if self.arr[8] == rhs.arr[8] { -1 } else { 0 },
302          if self.arr[9] == rhs.arr[9] { -1 } else { 0 },
303          if self.arr[10] == rhs.arr[10] { -1 } else { 0 },
304          if self.arr[11] == rhs.arr[11] { -1 } else { 0 },
305          if self.arr[12] == rhs.arr[12] { -1 } else { 0 },
306          if self.arr[13] == rhs.arr[13] { -1 } else { 0 },
307          if self.arr[14] == rhs.arr[14] { -1 } else { 0 },
308          if self.arr[15] == rhs.arr[15] { -1 } else { 0 },
309        ]}
310      }
311    }
312  }
313}
314
315impl CmpGt for i8x16 {
316  type Output = Self;
317  #[inline]
318  #[must_use]
319  fn cmp_gt(self, rhs: Self) -> Self::Output {
320    pick! {
321      if #[cfg(target_feature="sse2")] {
322        Self { sse: cmp_gt_mask_i8_m128i(self.sse, rhs.sse) }
323      } else if #[cfg(target_feature="simd128")] {
324        Self { simd: i8x16_gt(self.simd, rhs.simd) }
325      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
326        unsafe {Self { neon: vreinterpretq_s8_u8(vcgtq_s8(self.neon, rhs.neon)) }}
327      } else {
328        Self { arr: [
329          if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
330          if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
331          if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
332          if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
333          if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
334          if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
335          if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
336          if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
337          if self.arr[8] > rhs.arr[8] { -1 } else { 0 },
338          if self.arr[9] > rhs.arr[9] { -1 } else { 0 },
339          if self.arr[10] > rhs.arr[10] { -1 } else { 0 },
340          if self.arr[11] > rhs.arr[11] { -1 } else { 0 },
341          if self.arr[12] > rhs.arr[12] { -1 } else { 0 },
342          if self.arr[13] > rhs.arr[13] { -1 } else { 0 },
343          if self.arr[14] > rhs.arr[14] { -1 } else { 0 },
344          if self.arr[15] > rhs.arr[15] { -1 } else { 0 },
345        ]}
346      }
347    }
348  }
349}
350
351impl CmpLt for i8x16 {
352  type Output = Self;
353  #[inline]
354  #[must_use]
355  fn cmp_lt(self, rhs: Self) -> Self::Output {
356    pick! {
357      if #[cfg(target_feature="sse2")] {
358        Self { sse: cmp_lt_mask_i8_m128i(self.sse, rhs.sse) }
359      } else if #[cfg(target_feature="simd128")] {
360        Self { simd: i8x16_lt(self.simd, rhs.simd) }
361      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
362        unsafe {Self { neon: vreinterpretq_s8_u8(vcltq_s8(self.neon, rhs.neon)) }}
363      } else {
364        Self { arr: [
365          if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
366          if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
367          if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
368          if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
369          if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
370          if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
371          if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
372          if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
373          if self.arr[8] < rhs.arr[8] { -1 } else { 0 },
374          if self.arr[9] < rhs.arr[9] { -1 } else { 0 },
375          if self.arr[10] < rhs.arr[10] { -1 } else { 0 },
376          if self.arr[11] < rhs.arr[11] { -1 } else { 0 },
377          if self.arr[12] < rhs.arr[12] { -1 } else { 0 },
378          if self.arr[13] < rhs.arr[13] { -1 } else { 0 },
379          if self.arr[14] < rhs.arr[14] { -1 } else { 0 },
380          if self.arr[15] < rhs.arr[15] { -1 } else { 0 },
381        ]}
382      }
383    }
384  }
385}
386
387impl i8x16 {
388  #[inline]
389  #[must_use]
390  pub const fn new(array: [i8; 16]) -> Self {
391    unsafe { core::intrinsics::transmute(array) }
392  }
393
394  /// converts `i16` to `i8`, saturating values that are too large
395  #[inline]
396  #[must_use]
397  pub fn from_i16x16_saturate(v: i16x16) -> i8x16 {
398    pick! {
399      if #[cfg(target_feature="avx2")] {
400        i8x16 { sse: pack_i16_to_i8_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2))  }
401      } else if #[cfg(target_feature="sse2")] {
402        i8x16 { sse: pack_i16_to_i8_m128i( v.a.sse, v.b.sse ) }
403      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
404        use core::arch::aarch64::*;
405
406        unsafe {
407          i8x16 { neon: vcombine_s8(vqmovn_s16(v.a.neon), vqmovn_s16(v.b.neon)) }
408        }
409      } else if #[cfg(target_feature="simd128")] {
410        use core::arch::wasm32::*;
411
412        i8x16 { simd: i8x16_narrow_i16x8(v.a.simd, v.b.simd) }
413      } else {
414        fn clamp(a : i16) -> i8 {
415            if a < i8::MIN as i16 {
416              i8::MIN
417            }
418            else if a > i8::MAX as i16 {
419              i8::MAX
420            } else {
421                a as i8
422            }
423        }
424
425        i8x16::new([
426          clamp(v.as_array_ref()[0]),
427          clamp(v.as_array_ref()[1]),
428          clamp(v.as_array_ref()[2]),
429          clamp(v.as_array_ref()[3]),
430          clamp(v.as_array_ref()[4]),
431          clamp(v.as_array_ref()[5]),
432          clamp(v.as_array_ref()[6]),
433          clamp(v.as_array_ref()[7]),
434          clamp(v.as_array_ref()[8]),
435          clamp(v.as_array_ref()[9]),
436          clamp(v.as_array_ref()[10]),
437          clamp(v.as_array_ref()[11]),
438          clamp(v.as_array_ref()[12]),
439          clamp(v.as_array_ref()[13]),
440          clamp(v.as_array_ref()[14]),
441          clamp(v.as_array_ref()[15]),
442        ])
443      }
444    }
445  }
446
447  /// converts `i16` to `i8`, truncating the upper bits if they are set
448  #[inline]
449  #[must_use]
450  pub fn from_i16x16_truncate(v: i16x16) -> i8x16 {
451    pick! {
452      if #[cfg(target_feature="avx2")] {
453        let a = v.avx2.bitand(set_splat_i16_m256i(0xff));
454        i8x16 { sse: pack_i16_to_u8_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a))  }
455      } else if #[cfg(target_feature="sse2")] {
456        let mask = set_splat_i16_m128i(0xff);
457        i8x16 { sse: pack_i16_to_u8_m128i( v.a.sse.bitand(mask), v.b.sse.bitand(mask) ) }
458      } else {
459        // no super good intrinsics on other platforms... plain old codegen does a reasonable job
460        i8x16::new([
461          v.as_array_ref()[0] as i8,
462          v.as_array_ref()[1] as i8,
463          v.as_array_ref()[2] as i8,
464          v.as_array_ref()[3] as i8,
465          v.as_array_ref()[4] as i8,
466          v.as_array_ref()[5] as i8,
467          v.as_array_ref()[6] as i8,
468          v.as_array_ref()[7] as i8,
469          v.as_array_ref()[8] as i8,
470          v.as_array_ref()[9] as i8,
471          v.as_array_ref()[10] as i8,
472          v.as_array_ref()[11] as i8,
473          v.as_array_ref()[12] as i8,
474          v.as_array_ref()[13] as i8,
475          v.as_array_ref()[14] as i8,
476          v.as_array_ref()[15] as i8,
477        ])
478      }
479    }
480  }
481
482  #[inline]
483  #[must_use]
484  pub fn blend(self, t: Self, f: Self) -> Self {
485    pick! {
486      if #[cfg(target_feature="sse4.1")] {
487        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
488      } else if #[cfg(target_feature="simd128")] {
489        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
490      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
491        unsafe {Self { neon: vbslq_s8(vreinterpretq_u8_s8(self.neon), t.neon, f.neon) }}
492      } else {
493        generic_bit_blend(self, t, f)
494      }
495    }
496  }
497  #[inline]
498  #[must_use]
499  pub fn abs(self) -> Self {
500    pick! {
501      if #[cfg(target_feature="ssse3")] {
502        Self { sse: abs_i8_m128i(self.sse) }
503      } else if #[cfg(target_feature="simd128")] {
504        Self { simd: i8x16_abs(self.simd) }
505      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
506        unsafe {Self { neon: vabsq_s8(self.neon) }}
507      } else {
508        let arr: [i8; 16] = cast(self);
509        cast([
510          arr[0].wrapping_abs(),
511          arr[1].wrapping_abs(),
512          arr[2].wrapping_abs(),
513          arr[3].wrapping_abs(),
514          arr[4].wrapping_abs(),
515          arr[5].wrapping_abs(),
516          arr[6].wrapping_abs(),
517          arr[7].wrapping_abs(),
518          arr[8].wrapping_abs(),
519          arr[9].wrapping_abs(),
520          arr[10].wrapping_abs(),
521          arr[11].wrapping_abs(),
522          arr[12].wrapping_abs(),
523          arr[13].wrapping_abs(),
524          arr[14].wrapping_abs(),
525          arr[15].wrapping_abs(),
526        ])
527      }
528    }
529  }
530
531  #[inline]
532  #[must_use]
533  pub fn unsigned_abs(self) -> u8x16 {
534    pick! {
535      if #[cfg(target_feature="ssse3")] {
536        u8x16 { sse: abs_i8_m128i(self.sse) }
537      } else if #[cfg(target_feature="simd128")] {
538        u8x16 { simd: i8x16_abs(self.simd) }
539      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
540        unsafe { u8x16 { neon: vreinterpretq_u8_s8(vabsq_s8(self.neon)) }}
541      } else {
542        let arr: [i8; 16] = cast(self);
543        cast(
544          [
545            arr[0].unsigned_abs(),
546            arr[1].unsigned_abs(),
547            arr[2].unsigned_abs(),
548            arr[3].unsigned_abs(),
549            arr[4].unsigned_abs(),
550            arr[5].unsigned_abs(),
551            arr[6].unsigned_abs(),
552            arr[7].unsigned_abs(),
553            arr[8].unsigned_abs(),
554            arr[9].unsigned_abs(),
555            arr[10].unsigned_abs(),
556            arr[11].unsigned_abs(),
557            arr[12].unsigned_abs(),
558            arr[13].unsigned_abs(),
559            arr[14].unsigned_abs(),
560            arr[15].unsigned_abs(),
561            ])
562      }
563    }
564  }
565
566  #[inline]
567  #[must_use]
568  pub fn max(self, rhs: Self) -> Self {
569    pick! {
570      if #[cfg(target_feature="sse4.1")] {
571        Self { sse: max_i8_m128i(self.sse, rhs.sse) }
572      } else if #[cfg(target_feature="simd128")] {
573        Self { simd: i8x16_max(self.simd, rhs.simd) }
574      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
575        unsafe {Self { neon: vmaxq_s8(self.neon, rhs.neon) }}
576      } else {
577        self.cmp_lt(rhs).blend(rhs, self)
578      }
579    }
580  }
581  #[inline]
582  #[must_use]
583  pub fn min(self, rhs: Self) -> Self {
584    pick! {
585      if #[cfg(target_feature="sse4.1")] {
586        Self { sse: min_i8_m128i(self.sse, rhs.sse) }
587      } else if #[cfg(target_feature="simd128")] {
588        Self { simd: i8x16_min(self.simd, rhs.simd) }
589      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
590        unsafe {Self { neon: vminq_s8(self.neon, rhs.neon) }}
591      } else {
592        self.cmp_lt(rhs).blend(self, rhs)
593      }
594    }
595  }
596
597  #[inline]
598  #[must_use]
599  pub fn from_slice_unaligned(input: &[i8]) -> Self {
600    assert!(input.len() >= 16);
601
602    pick! {
603      if #[cfg(target_feature="sse2")] {
604        unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
605      } else if #[cfg(target_feature="simd128")] {
606        unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
607      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
608        unsafe { Self { neon: vld1q_s8( input.as_ptr() as *const i8 ) } }
609      } else {
610        // 2018 edition doesn't have try_into
611        unsafe { Self::new( *(input.as_ptr() as * const [i8;16]) ) }
612      }
613    }
614  }
615
616  #[inline]
617  #[must_use]
618  pub fn move_mask(self) -> i32 {
619    pick! {
620      if #[cfg(target_feature="sse2")] {
621        move_mask_i8_m128i(self.sse)
622      } else if #[cfg(target_feature="simd128")] {
623        i8x16_bitmask(self.simd) as i32
624      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
625        unsafe
626        {
627          // set all to 1 if top bit is set, else 0
628          let masked = vcltq_s8(self.neon, vdupq_n_s8(0));
629
630          // select the right bit out of each lane
631          let selectbit : uint8x16_t = core::intrinsics::transmute([1u8, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128]);
632          let out = vandq_u8(masked, selectbit);
633
634          // interleave the lanes so that a 16-bit sum accumulates the bits in the right order
635          let table : uint8x16_t = core::intrinsics::transmute([0u8, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15]);
636          let r = vqtbl1q_u8(out, table);
637
638          // horizontally add the 16-bit lanes
639          vaddvq_u16(vreinterpretq_u16_u8(r)) as i32
640        }
641       } else {
642        ((self.arr[0] < 0) as i32) << 0 |
643        ((self.arr[1] < 0) as i32) << 1 |
644        ((self.arr[2] < 0) as i32) << 2 |
645        ((self.arr[3] < 0) as i32) << 3 |
646        ((self.arr[4] < 0) as i32) << 4 |
647        ((self.arr[5] < 0) as i32) << 5 |
648        ((self.arr[6] < 0) as i32) << 6 |
649        ((self.arr[7] < 0) as i32) << 7 |
650        ((self.arr[8] < 0) as i32) << 8 |
651        ((self.arr[9] < 0) as i32) << 9 |
652        ((self.arr[10] < 0) as i32) << 10 |
653        ((self.arr[11] < 0) as i32) << 11 |
654        ((self.arr[12] < 0) as i32) << 12 |
655        ((self.arr[13] < 0) as i32) << 13 |
656        ((self.arr[14] < 0) as i32) << 14 |
657        ((self.arr[15] < 0) as i32) << 15
658      }
659    }
660  }
661
662  #[inline]
663  #[must_use]
664  pub fn any(self) -> bool {
665    pick! {
666      if #[cfg(target_feature="sse2")] {
667        move_mask_i8_m128i(self.sse) != 0
668      } else if #[cfg(target_feature="simd128")] {
669        u8x16_bitmask(self.simd) != 0
670      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
671        unsafe {
672          vminvq_s8(self.neon) < 0
673        }
674      } else {
675        let v : [u64;2] = cast(self);
676        ((v[0] | v[1]) & 0x80808080808080) != 0
677      }
678    }
679  }
680  #[inline]
681  #[must_use]
682  pub fn all(self) -> bool {
683    pick! {
684      if #[cfg(target_feature="sse2")] {
685        move_mask_i8_m128i(self.sse) == 0b1111_1111_1111_1111
686      } else if #[cfg(target_feature="simd128")] {
687        u8x16_bitmask(self.simd) == 0b1111_1111_1111_1111
688      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
689        unsafe {
690          vmaxvq_s8(self.neon) < 0
691        }
692      } else {
693        let v : [u64;2] = cast(self);
694        (v[0] & v[1] & 0x80808080808080) == 0x80808080808080
695      }
696    }
697  }
698
699  /// Returns a new vector where each element is based on the index values in
700  /// `rhs`.
701  ///
702  /// * Index values in the range `[0, 15]` select the i-th element of `self`.
703  /// * Index values that are out of range will cause that output lane to be
704  ///   `0`.
705  #[inline]
706  pub fn swizzle(self, rhs: i8x16) -> i8x16 {
707    pick! {
708      if #[cfg(target_feature="ssse3")] {
709        Self { sse: shuffle_av_i8z_all_m128i(self.sse, add_saturating_u8_m128i(rhs.sse, set_splat_i8_m128i(0x70))) }
710      } else if #[cfg(target_feature="simd128")] {
711        Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
712      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
713        unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
714      } else {
715        let idxs = rhs.to_array();
716        let arr = self.to_array();
717        let mut out = [0i8;16];
718        for i in 0..16 {
719          let idx = idxs[i] as usize;
720          if idx >= 16 {
721            out[i] = 0;
722          } else {
723            out[i] = arr[idx];
724          }
725        }
726        Self::new(out)
727      }
728    }
729  }
730
731  /// Works like [`swizzle`](Self::swizzle) with the following additional
732  /// details
733  ///
734  /// * Indices in the range `[0, 15]` will select the i-th element of `self`.
735  /// * If the high bit of any index is set (meaning that the index is
736  ///   negative), then the corresponding output lane is guaranteed to be zero.
737  /// * Otherwise the output lane is either `0` or `self[rhs[i] % 16]`,
738  ///   depending on the implementation.
739  #[inline]
740  pub fn swizzle_relaxed(self, rhs: i8x16) -> i8x16 {
741    pick! {
742      if #[cfg(target_feature="ssse3")] {
743        Self { sse: shuffle_av_i8z_all_m128i(self.sse, rhs.sse) }
744      } else if #[cfg(target_feature="simd128")] {
745        Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
746      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
747        unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
748      } else {
749        let idxs = rhs.to_array();
750        let arr = self.to_array();
751        let mut out = [0i8;16];
752        for i in 0..16 {
753          let idx = idxs[i] as usize;
754          if idx >= 16 {
755            out[i] = 0;
756          } else {
757            out[i] = arr[idx];
758          }
759        }
760        Self::new(out)
761      }
762    }
763  }
764
765  #[inline]
766  #[must_use]
767  pub fn none(self) -> bool {
768    !self.any()
769  }
770
771  #[inline]
772  #[must_use]
773  pub fn saturating_add(self, rhs: Self) -> Self {
774    pick! {
775      if #[cfg(target_feature="sse2")] {
776        Self { sse: add_saturating_i8_m128i(self.sse, rhs.sse) }
777      } else if #[cfg(target_feature="simd128")] {
778        Self { simd: i8x16_add_sat(self.simd, rhs.simd) }
779      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
780        unsafe {Self { neon: vqaddq_s8(self.neon, rhs.neon) }}
781      } else {
782        Self { arr: [
783          self.arr[0].saturating_add(rhs.arr[0]),
784          self.arr[1].saturating_add(rhs.arr[1]),
785          self.arr[2].saturating_add(rhs.arr[2]),
786          self.arr[3].saturating_add(rhs.arr[3]),
787          self.arr[4].saturating_add(rhs.arr[4]),
788          self.arr[5].saturating_add(rhs.arr[5]),
789          self.arr[6].saturating_add(rhs.arr[6]),
790          self.arr[7].saturating_add(rhs.arr[7]),
791          self.arr[8].saturating_add(rhs.arr[8]),
792          self.arr[9].saturating_add(rhs.arr[9]),
793          self.arr[10].saturating_add(rhs.arr[10]),
794          self.arr[11].saturating_add(rhs.arr[11]),
795          self.arr[12].saturating_add(rhs.arr[12]),
796          self.arr[13].saturating_add(rhs.arr[13]),
797          self.arr[14].saturating_add(rhs.arr[14]),
798          self.arr[15].saturating_add(rhs.arr[15]),
799        ]}
800      }
801    }
802  }
803  #[inline]
804  #[must_use]
805  pub fn saturating_sub(self, rhs: Self) -> Self {
806    pick! {
807      if #[cfg(target_feature="sse2")] {
808        Self { sse: sub_saturating_i8_m128i(self.sse, rhs.sse) }
809      } else if #[cfg(target_feature="simd128")] {
810        Self { simd: i8x16_sub_sat(self.simd, rhs.simd) }
811      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
812        unsafe { Self { neon: vqsubq_s8(self.neon, rhs.neon) } }
813      } else {
814        Self { arr: [
815          self.arr[0].saturating_sub(rhs.arr[0]),
816          self.arr[1].saturating_sub(rhs.arr[1]),
817          self.arr[2].saturating_sub(rhs.arr[2]),
818          self.arr[3].saturating_sub(rhs.arr[3]),
819          self.arr[4].saturating_sub(rhs.arr[4]),
820          self.arr[5].saturating_sub(rhs.arr[5]),
821          self.arr[6].saturating_sub(rhs.arr[6]),
822          self.arr[7].saturating_sub(rhs.arr[7]),
823          self.arr[8].saturating_sub(rhs.arr[8]),
824          self.arr[9].saturating_sub(rhs.arr[9]),
825          self.arr[10].saturating_sub(rhs.arr[10]),
826          self.arr[11].saturating_sub(rhs.arr[11]),
827          self.arr[12].saturating_sub(rhs.arr[12]),
828          self.arr[13].saturating_sub(rhs.arr[13]),
829          self.arr[14].saturating_sub(rhs.arr[14]),
830          self.arr[15].saturating_sub(rhs.arr[15]),
831        ]}
832      }
833    }
834  }
835
836  #[inline]
837  pub fn to_array(self) -> [i8; 16] {
838    cast(self)
839  }
840
841  #[inline]
842  pub fn as_array_ref(&self) -> &[i8; 16] {
843    cast_ref(self)
844  }
845
846  #[inline]
847  pub fn as_array_mut(&mut self) -> &mut [i8; 16] {
848    cast_mut(self)
849  }
850}