wide/
i16x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct i16x16 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct i16x16 { pub(crate) a : i16x8, pub(crate) b : i16x8 }
12  }
13}
14
15int_uint_consts!(i16, 16, i16x16, 256);
16
17unsafe impl Zeroable for i16x16 {}
18unsafe impl Pod for i16x16 {}
19
20impl Add for i16x16 {
21  type Output = Self;
22  #[inline]
23  #[must_use]
24  fn add(self, rhs: Self) -> Self::Output {
25    pick! {
26      if #[cfg(target_feature="avx2")] {
27        Self { avx2: add_i16_m256i(self.avx2, rhs.avx2) }
28      } else {
29        Self {
30          a : self.a.add(rhs.a),
31          b : self.b.add(rhs.b),
32        }
33      }
34    }
35  }
36}
37
38impl Sub for i16x16 {
39  type Output = Self;
40  #[inline]
41  #[must_use]
42  fn sub(self, rhs: Self) -> Self::Output {
43    pick! {
44      if #[cfg(target_feature="avx2")] {
45        Self { avx2: sub_i16_m256i(self.avx2, rhs.avx2) }
46      } else {
47        Self {
48          a : self.a.sub(rhs.a),
49          b : self.b.sub(rhs.b),
50        }
51      }
52    }
53  }
54}
55
56impl Mul for i16x16 {
57  type Output = Self;
58  #[inline]
59  #[must_use]
60  fn mul(self, rhs: Self) -> Self::Output {
61    pick! {
62      if #[cfg(target_feature="avx2")] {
63        Self { avx2: mul_i16_keep_low_m256i(self.avx2, rhs.avx2) }
64      } else {
65        Self {
66          a : self.a.mul(rhs.a),
67          b : self.b.mul(rhs.b),
68        }
69      }
70    }
71  }
72}
73
74impl Add<i16> for i16x16 {
75  type Output = Self;
76  #[inline]
77  #[must_use]
78  fn add(self, rhs: i16) -> Self::Output {
79    self.add(Self::splat(rhs))
80  }
81}
82
83impl Sub<i16> for i16x16 {
84  type Output = Self;
85  #[inline]
86  #[must_use]
87  fn sub(self, rhs: i16) -> Self::Output {
88    self.sub(Self::splat(rhs))
89  }
90}
91
92impl Mul<i16> for i16x16 {
93  type Output = Self;
94  #[inline]
95  #[must_use]
96  fn mul(self, rhs: i16) -> Self::Output {
97    self.mul(Self::splat(rhs))
98  }
99}
100
101impl Add<i16x16> for i16 {
102  type Output = i16x16;
103  #[inline]
104  #[must_use]
105  fn add(self, rhs: i16x16) -> Self::Output {
106    i16x16::splat(self).add(rhs)
107  }
108}
109
110impl Sub<i16x16> for i16 {
111  type Output = i16x16;
112  #[inline]
113  #[must_use]
114  fn sub(self, rhs: i16x16) -> Self::Output {
115    i16x16::splat(self).sub(rhs)
116  }
117}
118
119impl Mul<i16x16> for i16 {
120  type Output = i16x16;
121  #[inline]
122  #[must_use]
123  fn mul(self, rhs: i16x16) -> Self::Output {
124    i16x16::splat(self).mul(rhs)
125  }
126}
127
128impl BitAnd for i16x16 {
129  type Output = Self;
130  #[inline]
131  #[must_use]
132  fn bitand(self, rhs: Self) -> Self::Output {
133    pick! {
134      if #[cfg(target_feature="avx2")] {
135        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
136      } else {
137        Self {
138          a : self.a.bitand(rhs.a),
139          b : self.b.bitand(rhs.b),
140        }
141      }
142    }
143  }
144}
145
146impl BitOr for i16x16 {
147  type Output = Self;
148  #[inline]
149  #[must_use]
150  fn bitor(self, rhs: Self) -> Self::Output {
151    pick! {
152      if #[cfg(target_feature="avx2")] {
153        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
154      } else {
155        Self {
156          a : self.a.bitor(rhs.a),
157          b : self.b.bitor(rhs.b),
158        }
159      }
160    }
161  }
162}
163
164impl BitXor for i16x16 {
165  type Output = Self;
166  #[inline]
167  #[must_use]
168  fn bitxor(self, rhs: Self) -> Self::Output {
169    pick! {
170      if #[cfg(target_feature="avx2")] {
171        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
172      } else {
173        Self {
174          a : self.a.bitxor(rhs.a),
175          b : self.b.bitxor(rhs.b),
176        }
177      }
178    }
179  }
180}
181
182macro_rules! impl_shl_t_for_i16x16 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shl<$shift_type> for i16x16 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      #[must_use]
189      fn shl(self, rhs: $shift_type) -> Self::Output {
190        pick! {
191          if #[cfg(target_feature="avx2")] {
192            let shift = cast([rhs as u64, 0]);
193            Self { avx2: shl_all_u16_m256i(self.avx2, shift) }
194          } else {
195            Self {
196              a : self.a.shl(rhs),
197              b : self.b.shl(rhs),
198            }
199          }
200       }
201     }
202    })+
203  };
204}
205impl_shl_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
206
207macro_rules! impl_shr_t_for_i16x16 {
208  ($($shift_type:ty),+ $(,)?) => {
209    $(impl Shr<$shift_type> for i16x16 {
210      type Output = Self;
211      /// Shifts all lanes by the value given.
212      #[inline]
213      #[must_use]
214      fn shr(self, rhs: $shift_type) -> Self::Output {
215        pick! {
216          if #[cfg(target_feature="avx2")] {
217            let shift = cast([rhs as u64, 0]);
218            Self { avx2: shr_all_i16_m256i(self.avx2, shift) }
219          } else {
220            Self {
221              a : self.a.shr(rhs),
222              b : self.b.shr(rhs),
223            }
224          }
225        }
226      }
227    })+
228  };
229}
230impl_shr_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
231
232impl CmpEq for i16x16 {
233  type Output = Self;
234  #[inline]
235  #[must_use]
236  fn cmp_eq(self, rhs: Self) -> Self::Output {
237    pick! {
238      if #[cfg(target_feature="avx2")] {
239        Self { avx2: cmp_eq_mask_i16_m256i(self.avx2, rhs.avx2) }
240      } else {
241        Self {
242          a : self.a.cmp_eq(rhs.a),
243          b : self.b.cmp_eq(rhs.b),
244        }
245      }
246    }
247  }
248}
249
250impl CmpGt for i16x16 {
251  type Output = Self;
252  #[inline]
253  #[must_use]
254  fn cmp_gt(self, rhs: Self) -> Self::Output {
255    pick! {
256      if #[cfg(target_feature="avx2")] {
257        Self { avx2: cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) }
258      } else {
259        Self {
260          a : self.a.cmp_gt(rhs.a),
261          b : self.b.cmp_gt(rhs.b),
262        }
263      }
264    }
265  }
266}
267
268impl CmpLt for i16x16 {
269  type Output = Self;
270  #[inline]
271  #[must_use]
272  fn cmp_lt(self, rhs: Self) -> Self::Output {
273    pick! {
274      if #[cfg(target_feature="avx2")] {
275        Self { avx2: !cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i16_m256i(self.avx2,rhs.avx2) }
276      } else {
277        Self {
278          a : self.a.cmp_lt(rhs.a),
279          b : self.b.cmp_lt(rhs.b),
280        }
281      }
282    }
283  }
284}
285
286impl From<i8x16> for i16x16 {
287  /// widen with sign extend from i8 to i16
288  #[inline]
289  #[must_use]
290  fn from(i: i8x16) -> Self {
291    i16x16::from_i8x16(i)
292  }
293}
294
295impl From<u8x16> for i16x16 {
296  /// widen with zero extend from u8 to i16
297  #[inline]
298  #[must_use]
299  fn from(i: u8x16) -> Self {
300    cast(u16x16::from(i))
301  }
302}
303
304impl i16x16 {
305  #[inline]
306  #[must_use]
307  pub const fn new(array: [i16; 16]) -> Self {
308    unsafe { core::intrinsics::transmute(array) }
309  }
310
311  #[inline]
312  #[must_use]
313  pub fn move_mask(self) -> i32 {
314    pick! {
315      if #[cfg(target_feature="sse2")] {
316          let [a,b] = cast::<_,[m128i;2]>(self);
317          move_mask_i8_m128i( pack_i16_to_i8_m128i(a,b))
318        } else {
319        self.a.move_mask() | (self.b.move_mask() << 8)
320      }
321    }
322  }
323
324  #[inline]
325  #[must_use]
326  pub fn any(self) -> bool {
327    pick! {
328      if #[cfg(target_feature="avx2")] {
329        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) != 0
330      } else {
331        (self.a | self.b).any()
332      }
333    }
334  }
335  #[inline]
336  #[must_use]
337  pub fn all(self) -> bool {
338    pick! {
339      if #[cfg(target_feature="avx2")] {
340        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) == 0b10101010101010101010101010101010
341      } else {
342        (self.a & self.b).all()
343      }
344    }
345  }
346  #[inline]
347  #[must_use]
348  pub fn none(self) -> bool {
349    !self.any()
350  }
351
352  /// widens and sign extends to i16x16
353  #[inline]
354  #[must_use]
355  pub fn from_i8x16(v: i8x16) -> Self {
356    pick! {
357      if #[cfg(target_feature="avx2")] {
358        i16x16 { avx2:convert_to_i16_m256i_from_i8_m128i(v.sse) }
359      } else if #[cfg(target_feature="sse4.1")] {
360        i16x16 {
361          a: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(v.sse) },
362          b: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(unpack_high_i64_m128i(v.sse, v.sse)) }
363        }
364      } else if #[cfg(target_feature="sse2")] {
365        i16x16 {
366          a: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_low_i8_m128i(v.sse, v.sse)) },
367          b: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_high_i8_m128i(v.sse, v.sse)) },
368        }
369      } else {
370
371        i16x16::new([
372          v.as_array_ref()[0] as i16,
373          v.as_array_ref()[1] as i16,
374          v.as_array_ref()[2] as i16,
375          v.as_array_ref()[3] as i16,
376          v.as_array_ref()[4] as i16,
377          v.as_array_ref()[5] as i16,
378          v.as_array_ref()[6] as i16,
379          v.as_array_ref()[7] as i16,
380          v.as_array_ref()[8] as i16,
381          v.as_array_ref()[9] as i16,
382          v.as_array_ref()[10] as i16,
383          v.as_array_ref()[11] as i16,
384          v.as_array_ref()[12] as i16,
385          v.as_array_ref()[13] as i16,
386          v.as_array_ref()[14] as i16,
387          v.as_array_ref()[15] as i16,
388          ])
389      }
390    }
391  }
392
393  #[inline]
394  #[must_use]
395  pub fn blend(self, t: Self, f: Self) -> Self {
396    pick! {
397      if #[cfg(target_feature="avx2")] {
398        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
399      } else {
400        Self {
401          a : self.a.blend(t.a, f.a),
402          b : self.b.blend(t.b, f.b),
403        }
404      }
405    }
406  }
407
408  /// horizontal add of all the elements of the vector
409  #[inline]
410  #[must_use]
411  pub fn reduce_add(self) -> i16 {
412    let arr: [i16x8; 2] = cast(self);
413
414    (arr[0] + arr[1]).reduce_add()
415  }
416
417  /// horizontal min of all the elements of the vector
418  #[inline]
419  #[must_use]
420  pub fn reduce_min(self) -> i16 {
421    let arr: [i16x8; 2] = cast(self);
422
423    arr[0].min(arr[1]).reduce_min()
424  }
425
426  /// horizontal max of all the elements of the vector
427  #[inline]
428  #[must_use]
429  pub fn reduce_max(self) -> i16 {
430    let arr: [i16x8; 2] = cast(self);
431
432    arr[0].max(arr[1]).reduce_max()
433  }
434
435  #[inline]
436  #[must_use]
437  pub fn abs(self) -> Self {
438    pick! {
439      if #[cfg(target_feature="avx2")] {
440        Self { avx2: abs_i16_m256i(self.avx2) }
441      } else {
442        Self {
443          a : self.a.abs(),
444          b : self.b.abs(),
445        }
446      }
447    }
448  }
449  #[inline]
450  #[must_use]
451  pub fn max(self, rhs: Self) -> Self {
452    pick! {
453      if #[cfg(target_feature="avx2")] {
454        Self { avx2: max_i16_m256i(self.avx2, rhs.avx2) }
455      } else {
456        Self {
457          a : self.a.max(rhs.a),
458          b : self.b.max(rhs.b),
459        }
460      }
461    }
462  }
463  #[inline]
464  #[must_use]
465  pub fn min(self, rhs: Self) -> Self {
466    pick! {
467      if #[cfg(target_feature="avx2")] {
468        Self { avx2: min_i16_m256i(self.avx2, rhs.avx2) }
469      } else {
470        Self {
471          a : self.a.min(rhs.a),
472          b : self.b.min(rhs.b),
473        }
474      }
475    }
476  }
477
478  #[inline]
479  #[must_use]
480  pub fn saturating_add(self, rhs: Self) -> Self {
481    pick! {
482      if #[cfg(target_feature="avx2")] {
483        Self { avx2: add_saturating_i16_m256i(self.avx2, rhs.avx2) }
484      } else {
485        Self {
486          a : self.a.saturating_add(rhs.a),
487          b : self.b.saturating_add(rhs.b),
488        }
489      }
490    }
491  }
492  #[inline]
493  #[must_use]
494  pub fn saturating_sub(self, rhs: Self) -> Self {
495    pick! {
496      if #[cfg(target_feature="avx2")] {
497        Self { avx2: sub_saturating_i16_m256i(self.avx2, rhs.avx2) }
498      } else {
499        Self {
500          a : self.a.saturating_sub(rhs.a),
501          b : self.b.saturating_sub(rhs.b),
502        }
503      }
504    }
505  }
506
507  /// Calculates partial dot product.
508  /// Multiplies packed signed 16-bit integers, producing intermediate signed
509  /// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
510  /// integers.
511  #[inline]
512  #[must_use]
513  pub fn dot(self, rhs: Self) -> i32x8 {
514    pick! {
515      if #[cfg(target_feature="avx2")] {
516        i32x8 { avx2:  mul_i16_horizontal_add_m256i(self.avx2, rhs.avx2) }
517      } else {
518        i32x8 {
519          a : self.a.dot(rhs.a),
520          b : self.b.dot(rhs.b),
521        }
522      }
523    }
524  }
525
526  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
527  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
528  /// and `1`. This corresponds to the following instructions:
529  /// - `vqrdmulhq_n_s16` instruction on neon
530  /// - `i16x8_q15mulr_sat` on simd128
531  /// - `_mm256_mulhrs_epi16` on avx2
532  /// - emulated via `mul_i16_*` on sse2
533  #[inline]
534  #[must_use]
535  pub fn mul_scale_round(self, rhs: Self) -> Self {
536    pick! {
537      if #[cfg(target_feature="avx2")] {
538        Self { avx2: mul_i16_scale_round_m256i(self.avx2, rhs.avx2) }
539      } else {
540        Self {
541          a : self.a.mul_scale_round(rhs.a),
542          b : self.b.mul_scale_round(rhs.b),
543        }
544      }
545    }
546  }
547
548  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
549  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
550  /// and `1`. This corresponds to the following instructions:
551  /// - `vqrdmulhq_n_s16` instruction on neon
552  /// - `i16x8_q15mulr_sat` on simd128
553  /// - `_mm256_mulhrs_epi16` on avx2
554  /// - emulated via `mul_i16_*` on sse2
555  #[inline]
556  #[must_use]
557  pub fn mul_scale_round_n(self, rhs: i16) -> Self {
558    pick! {
559      if #[cfg(target_feature="avx2")] {
560        Self { avx2: mul_i16_scale_round_m256i(self.avx2, set_splat_i16_m256i(rhs)) }
561      } else {
562        Self {
563          a : self.a.mul_scale_round_n(rhs),
564          b : self.b.mul_scale_round_n(rhs),
565        }
566      }
567    }
568  }
569
570  #[inline]
571  pub fn to_array(self) -> [i16; 16] {
572    cast(self)
573  }
574
575  #[inline]
576  pub fn as_array_ref(&self) -> &[i16; 16] {
577    cast_ref(self)
578  }
579
580  #[inline]
581  pub fn as_array_mut(&mut self) -> &mut [i16; 16] {
582    cast_mut(self)
583  }
584}