1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i16x8 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i16x8 { pub(crate) simd: v128 }
14
15 impl Default for i16x8 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i16x8 {
22 fn eq(&self, other: &Self) -> bool {
23 u16x8_all_true(i16x8_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i16x8 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i16x8 { pub(crate) neon : int16x8_t }
33
34 impl Default for i16x8 {
35 #[inline]
36 #[must_use]
37 fn default() -> Self {
38 Self::splat(0)
39 }
40 }
41
42 impl PartialEq for i16x8 {
43 #[inline]
44 #[must_use]
45 fn eq(&self, other: &Self) -> bool {
46 unsafe { vminvq_u16(vceqq_s16(self.neon, other.neon))==u16::MAX }
47 }
48 }
49
50 impl Eq for i16x8 { }
51 } else {
52 #[derive(Default, Clone, Copy, PartialEq, Eq)]
53 #[repr(C, align(16))]
54 pub struct i16x8 { pub(crate) arr: [i16;8] }
55 }
56}
57
58int_uint_consts!(i16, 8, i16x8, 128);
59
60unsafe impl Zeroable for i16x8 {}
61unsafe impl Pod for i16x8 {}
62
63impl Add for i16x8 {
64 type Output = Self;
65 #[inline]
66 #[must_use]
67 fn add(self, rhs: Self) -> Self::Output {
68 pick! {
69 if #[cfg(target_feature="sse2")] {
70 Self { sse: add_i16_m128i(self.sse, rhs.sse) }
71 } else if #[cfg(target_feature="simd128")] {
72 Self { simd: i16x8_add(self.simd, rhs.simd) }
73 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74 unsafe { Self { neon: vaddq_s16(self.neon, rhs.neon) } }
75 } else {
76 Self { arr: [
77 self.arr[0].wrapping_add(rhs.arr[0]),
78 self.arr[1].wrapping_add(rhs.arr[1]),
79 self.arr[2].wrapping_add(rhs.arr[2]),
80 self.arr[3].wrapping_add(rhs.arr[3]),
81 self.arr[4].wrapping_add(rhs.arr[4]),
82 self.arr[5].wrapping_add(rhs.arr[5]),
83 self.arr[6].wrapping_add(rhs.arr[6]),
84 self.arr[7].wrapping_add(rhs.arr[7]),
85 ]}
86 }
87 }
88 }
89}
90
91impl Sub for i16x8 {
92 type Output = Self;
93 #[inline]
94 #[must_use]
95 fn sub(self, rhs: Self) -> Self::Output {
96 pick! {
97 if #[cfg(target_feature="sse2")] {
98 Self { sse: sub_i16_m128i(self.sse, rhs.sse) }
99 } else if #[cfg(target_feature="simd128")] {
100 Self { simd: i16x8_sub(self.simd, rhs.simd) }
101 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
102 unsafe {Self { neon: vsubq_s16(self.neon, rhs.neon) }}
103 } else {
104 Self { arr: [
105 self.arr[0].wrapping_sub(rhs.arr[0]),
106 self.arr[1].wrapping_sub(rhs.arr[1]),
107 self.arr[2].wrapping_sub(rhs.arr[2]),
108 self.arr[3].wrapping_sub(rhs.arr[3]),
109 self.arr[4].wrapping_sub(rhs.arr[4]),
110 self.arr[5].wrapping_sub(rhs.arr[5]),
111 self.arr[6].wrapping_sub(rhs.arr[6]),
112 self.arr[7].wrapping_sub(rhs.arr[7]),
113 ]}
114 }
115 }
116 }
117}
118
119impl Mul for i16x8 {
120 type Output = Self;
121 #[inline]
122 #[must_use]
123 fn mul(self, rhs: Self) -> Self::Output {
124 pick! {
125 if #[cfg(target_feature="sse2")] {
126 Self { sse: mul_i16_keep_low_m128i(self.sse, rhs.sse) }
127 } else if #[cfg(target_feature="simd128")] {
128 Self { simd: i16x8_mul(self.simd, rhs.simd) }
129 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
130 unsafe {Self { neon: vmulq_s16(self.neon, rhs.neon) }}
131 } else {
132 Self { arr: [
133 self.arr[0].wrapping_mul(rhs.arr[0]),
134 self.arr[1].wrapping_mul(rhs.arr[1]),
135 self.arr[2].wrapping_mul(rhs.arr[2]),
136 self.arr[3].wrapping_mul(rhs.arr[3]),
137 self.arr[4].wrapping_mul(rhs.arr[4]),
138 self.arr[5].wrapping_mul(rhs.arr[5]),
139 self.arr[6].wrapping_mul(rhs.arr[6]),
140 self.arr[7].wrapping_mul(rhs.arr[7]),
141 ]}
142 }
143 }
144 }
145}
146
147impl Add<i16> for i16x8 {
148 type Output = Self;
149 #[inline]
150 #[must_use]
151 fn add(self, rhs: i16) -> Self::Output {
152 self.add(Self::splat(rhs))
153 }
154}
155
156impl Sub<i16> for i16x8 {
157 type Output = Self;
158 #[inline]
159 #[must_use]
160 fn sub(self, rhs: i16) -> Self::Output {
161 self.sub(Self::splat(rhs))
162 }
163}
164
165impl Mul<i16> for i16x8 {
166 type Output = Self;
167 #[inline]
168 #[must_use]
169 fn mul(self, rhs: i16) -> Self::Output {
170 self.mul(Self::splat(rhs))
171 }
172}
173
174impl Add<i16x8> for i16 {
175 type Output = i16x8;
176 #[inline]
177 #[must_use]
178 fn add(self, rhs: i16x8) -> Self::Output {
179 i16x8::splat(self).add(rhs)
180 }
181}
182
183impl Sub<i16x8> for i16 {
184 type Output = i16x8;
185 #[inline]
186 #[must_use]
187 fn sub(self, rhs: i16x8) -> Self::Output {
188 i16x8::splat(self).sub(rhs)
189 }
190}
191
192impl Mul<i16x8> for i16 {
193 type Output = i16x8;
194 #[inline]
195 #[must_use]
196 fn mul(self, rhs: i16x8) -> Self::Output {
197 i16x8::splat(self).mul(rhs)
198 }
199}
200
201impl BitAnd for i16x8 {
202 type Output = Self;
203 #[inline]
204 #[must_use]
205 fn bitand(self, rhs: Self) -> Self::Output {
206 pick! {
207 if #[cfg(target_feature="sse2")] {
208 Self { sse: bitand_m128i(self.sse, rhs.sse) }
209 } else if #[cfg(target_feature="simd128")] {
210 Self { simd: v128_and(self.simd, rhs.simd) }
211 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
212 unsafe {Self { neon: vandq_s16(self.neon, rhs.neon) }}
213 } else {
214 Self { arr: [
215 self.arr[0].bitand(rhs.arr[0]),
216 self.arr[1].bitand(rhs.arr[1]),
217 self.arr[2].bitand(rhs.arr[2]),
218 self.arr[3].bitand(rhs.arr[3]),
219 self.arr[4].bitand(rhs.arr[4]),
220 self.arr[5].bitand(rhs.arr[5]),
221 self.arr[6].bitand(rhs.arr[6]),
222 self.arr[7].bitand(rhs.arr[7]),
223 ]}
224 }
225 }
226 }
227}
228
229impl BitOr for i16x8 {
230 type Output = Self;
231 #[inline]
232 #[must_use]
233 fn bitor(self, rhs: Self) -> Self::Output {
234 pick! {
235 if #[cfg(target_feature="sse2")] {
236 Self { sse: bitor_m128i(self.sse, rhs.sse) }
237 } else if #[cfg(target_feature="simd128")] {
238 Self { simd: v128_or(self.simd, rhs.simd) }
239 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
240 unsafe {Self { neon: vorrq_s16(self.neon, rhs.neon) }}
241 } else {
242 Self { arr: [
243 self.arr[0].bitor(rhs.arr[0]),
244 self.arr[1].bitor(rhs.arr[1]),
245 self.arr[2].bitor(rhs.arr[2]),
246 self.arr[3].bitor(rhs.arr[3]),
247 self.arr[4].bitor(rhs.arr[4]),
248 self.arr[5].bitor(rhs.arr[5]),
249 self.arr[6].bitor(rhs.arr[6]),
250 self.arr[7].bitor(rhs.arr[7]),
251 ]}
252 }
253 }
254 }
255}
256
257impl BitXor for i16x8 {
258 type Output = Self;
259 #[inline]
260 #[must_use]
261 fn bitxor(self, rhs: Self) -> Self::Output {
262 pick! {
263 if #[cfg(target_feature="sse2")] {
264 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
265 } else if #[cfg(target_feature="simd128")] {
266 Self { simd: v128_xor(self.simd, rhs.simd) }
267 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
268 unsafe {Self { neon: veorq_s16(self.neon, rhs.neon) }}
269 } else {
270 Self { arr: [
271 self.arr[0].bitxor(rhs.arr[0]),
272 self.arr[1].bitxor(rhs.arr[1]),
273 self.arr[2].bitxor(rhs.arr[2]),
274 self.arr[3].bitxor(rhs.arr[3]),
275 self.arr[4].bitxor(rhs.arr[4]),
276 self.arr[5].bitxor(rhs.arr[5]),
277 self.arr[6].bitxor(rhs.arr[6]),
278 self.arr[7].bitxor(rhs.arr[7]),
279 ]}
280 }
281 }
282 }
283}
284
285macro_rules! impl_shl_t_for_i16x8 {
286 ($($shift_type:ty),+ $(,)?) => {
287 $(impl Shl<$shift_type> for i16x8 {
288 type Output = Self;
289 #[inline]
291 #[must_use]
292 fn shl(self, rhs: $shift_type) -> Self::Output {
293 pick! {
294 if #[cfg(target_feature="sse2")] {
295 let shift = cast([rhs as u64, 0]);
296 Self { sse: shl_all_u16_m128i(self.sse, shift) }
297 } else if #[cfg(target_feature="simd128")] {
298 Self { simd: i16x8_shl(self.simd, rhs as u32) }
299 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
300 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16(rhs as i16)) }}
301 } else {
302 let u = rhs as u64;
303 Self { arr: [
304 self.arr[0] << u,
305 self.arr[1] << u,
306 self.arr[2] << u,
307 self.arr[3] << u,
308 self.arr[4] << u,
309 self.arr[5] << u,
310 self.arr[6] << u,
311 self.arr[7] << u,
312 ]}
313 }
314 }
315 }
316 })+
317 };
318}
319impl_shl_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
320
321macro_rules! impl_shr_t_for_i16x8 {
322 ($($shift_type:ty),+ $(,)?) => {
323 $(impl Shr<$shift_type> for i16x8 {
324 type Output = Self;
325 #[inline]
327 #[must_use]
328 fn shr(self, rhs: $shift_type) -> Self::Output {
329 pick! {
330 if #[cfg(target_feature="sse2")] {
331 let shift = cast([rhs as u64, 0]);
332 Self { sse: shr_all_i16_m128i(self.sse, shift) }
333 } else if #[cfg(target_feature="simd128")] {
334 Self { simd: i16x8_shr(self.simd, rhs as u32) }
335 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
336 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16( -(rhs as i16))) }}
337 } else {
338 let u = rhs as u64;
339 Self { arr: [
340 self.arr[0] >> u,
341 self.arr[1] >> u,
342 self.arr[2] >> u,
343 self.arr[3] >> u,
344 self.arr[4] >> u,
345 self.arr[5] >> u,
346 self.arr[6] >> u,
347 self.arr[7] >> u,
348 ]}
349 }
350 }
351 }
352 })+
353 };
354}
355impl_shr_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
356
357impl CmpEq for i16x8 {
358 type Output = Self;
359 #[inline]
360 #[must_use]
361 fn cmp_eq(self, rhs: Self) -> Self::Output {
362 pick! {
363 if #[cfg(target_feature="sse2")] {
364 Self { sse: cmp_eq_mask_i16_m128i(self.sse, rhs.sse) }
365 } else if #[cfg(target_feature="simd128")] {
366 Self { simd: i16x8_eq(self.simd, rhs.simd) }
367 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
368 unsafe {Self { neon: vreinterpretq_s16_u16(vceqq_s16(self.neon, rhs.neon)) }}
369 } else {
370 Self { arr: [
371 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
372 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
373 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
374 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
375 if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
376 if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
377 if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
378 if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
379 ]}
380 }
381 }
382 }
383}
384
385impl CmpGt for i16x8 {
386 type Output = Self;
387 #[inline]
388 #[must_use]
389 fn cmp_gt(self, rhs: Self) -> Self::Output {
390 pick! {
391 if #[cfg(target_feature="sse2")] {
392 Self { sse: cmp_gt_mask_i16_m128i(self.sse, rhs.sse) }
393 } else if #[cfg(target_feature="simd128")] {
394 Self { simd: i16x8_gt(self.simd, rhs.simd) }
395 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
396 unsafe {Self { neon: vreinterpretq_s16_u16(vcgtq_s16(self.neon, rhs.neon)) }}
397 } else {
398 Self { arr: [
399 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
400 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
401 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
402 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
403 if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
404 if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
405 if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
406 if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
407 ]}
408 }
409 }
410 }
411}
412
413impl CmpLt for i16x8 {
414 type Output = Self;
415 #[inline]
416 #[must_use]
417 fn cmp_lt(self, rhs: Self) -> Self::Output {
418 pick! {
419 if #[cfg(target_feature="sse2")] {
420 Self { sse: cmp_lt_mask_i16_m128i(self.sse, rhs.sse) }
421 } else if #[cfg(target_feature="simd128")] {
422 Self { simd: i16x8_lt(self.simd, rhs.simd) }
423 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
424 unsafe {Self { neon: vreinterpretq_s16_u16(vcltq_s16(self.neon, rhs.neon)) }}
425 } else {
426 Self { arr: [
427 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
428 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
429 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
430 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
431 if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
432 if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
433 if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
434 if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
435 ]}
436 }
437 }
438 }
439}
440
441impl i16x8 {
442 #[inline]
443 #[must_use]
444 pub const fn new(array: [i16; 8]) -> Self {
445 unsafe { core::intrinsics::transmute(array) }
446 }
447
448 #[inline]
449 #[must_use]
450 pub fn move_mask(self) -> i32 {
451 pick! {
452 if #[cfg(target_feature="sse2")] {
453 move_mask_i8_m128i( pack_i16_to_i8_m128i(self.sse,self.sse)) & 0xff
454 } else if #[cfg(target_feature="simd128")] {
455 i16x8_bitmask(self.simd) as i32
456 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
457 unsafe
458 {
459 let masked = vcltq_s16(self.neon, vdupq_n_s16(0));
461
462 let selectbit : uint16x8_t = core::intrinsics::transmute([1u16, 2, 4, 8, 16, 32, 64, 128]);
464 let r = vandq_u16(masked, selectbit);
465
466 vaddvq_u16(r) as i32
468 }
469 } else {
470 ((self.arr[0] < 0) as i32) << 0 |
471 ((self.arr[1] < 0) as i32) << 1 |
472 ((self.arr[2] < 0) as i32) << 2 |
473 ((self.arr[3] < 0) as i32) << 3 |
474 ((self.arr[4] < 0) as i32) << 4 |
475 ((self.arr[5] < 0) as i32) << 5 |
476 ((self.arr[6] < 0) as i32) << 6 |
477 ((self.arr[7] < 0) as i32) << 7
478 }
479 }
480 }
481
482 #[inline]
483 #[must_use]
484 pub fn any(self) -> bool {
485 pick! {
486 if #[cfg(target_feature="sse2")] {
487 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) != 0
488 } else if #[cfg(target_feature="simd128")] {
489 u16x8_bitmask(self.simd) != 0
490 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
491 unsafe {
492 vminvq_s16(self.neon) < 0
493 }
494 } else {
495 let v : [u64;2] = cast(self);
496 ((v[0] | v[1]) & 0x8000800080008000) != 0
497 }
498 }
499 }
500
501 #[inline]
502 #[must_use]
503 pub fn all(self) -> bool {
504 pick! {
505 if #[cfg(target_feature="sse2")] {
506 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) == 0b1010101010101010
507 } else if #[cfg(target_feature="simd128")] {
508 u16x8_bitmask(self.simd) == 0b11111111
509 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
510 unsafe {
511 vmaxvq_s16(self.neon) < 0
512 }
513 } else {
514 let v : [u64;2] = cast(self);
515 (v[0] & v[1] & 0x8000800080008000) == 0x8000800080008000
516 }
517 }
518 }
519
520 #[inline]
521 #[must_use]
522 pub fn none(self) -> bool {
523 !self.any()
524 }
525
526 #[inline]
528 #[must_use]
529 pub fn from_u8x16_low(u: u8x16) -> Self {
530 pick! {
531 if #[cfg(target_feature="sse2")] {
532 Self{ sse: unpack_low_i8_m128i(u.sse, m128i::zeroed()) }
533 } else {
534 let u_arr: [u8; 16] = cast(u);
535 cast([
536 u_arr[0] as u16 as i16,
537 u_arr[1] as u16 as i16,
538 u_arr[2] as u16 as i16,
539 u_arr[3] as u16 as i16,
540 u_arr[4] as u16 as i16,
541 u_arr[5] as u16 as i16,
542 u_arr[6] as u16 as i16,
543 u_arr[7] as u16 as i16,
544 ])
545 }
546 }
547 }
548
549 #[inline]
551 #[must_use]
552 pub fn from_u8x16_high(u: u8x16) -> Self {
553 pick! {
554 if #[cfg(target_feature="sse2")] {
555 Self{ sse: unpack_high_i8_m128i(u.sse, m128i::zeroed()) }
556 } else {
557 let u_arr: [u8; 16] = cast(u);
558 cast([
559 u_arr[8] as u16 as i16,
560 u_arr[9] as u16 as i16,
561 u_arr[10] as u16 as i16,
562 u_arr[11] as u16 as i16,
563 u_arr[12] as u16 as i16,
564 u_arr[13] as u16 as i16,
565 u_arr[14] as u16 as i16,
566 u_arr[15] as u16 as i16,
567 ])
568 }
569 }
570 }
571
572 #[inline]
574 #[must_use]
575 pub fn from_i32x8_saturate(v: i32x8) -> Self {
576 pick! {
577 if #[cfg(target_feature="avx2")] {
578 i16x8 { sse: pack_i32_to_i16_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2)) }
579 } else if #[cfg(target_feature="sse2")] {
580 i16x8 { sse: pack_i32_to_i16_m128i( v.a.sse, v.b.sse ) }
581 } else if #[cfg(target_feature="simd128")] {
582 use core::arch::wasm32::*;
583
584 i16x8 { simd: i16x8_narrow_i32x4(v.a.simd, v.b.simd) }
585 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
586 use core::arch::aarch64::*;
587
588 unsafe {
589 i16x8 { neon: vcombine_s16(vqmovn_s32(v.a.neon), vqmovn_s32(v.b.neon)) }
590 }
591 } else {
592 fn clamp(a : i32) -> i16 {
593 if a < i16::MIN as i32 {
594 i16::MIN
595 }
596 else if a > i16::MAX as i32 {
597 i16::MAX
598 } else {
599 a as i16
600 }
601 }
602
603 i16x8::new([
604 clamp(v.as_array_ref()[0]),
605 clamp(v.as_array_ref()[1]),
606 clamp(v.as_array_ref()[2]),
607 clamp(v.as_array_ref()[3]),
608 clamp(v.as_array_ref()[4]),
609 clamp(v.as_array_ref()[5]),
610 clamp(v.as_array_ref()[6]),
611 clamp(v.as_array_ref()[7]),
612 ])
613 }
614 }
615 }
616
617 #[inline]
619 #[must_use]
620 pub fn from_i32x8_truncate(v: i32x8) -> Self {
621 pick! {
622 if #[cfg(target_feature="avx2")] {
623 let a = v.avx2.bitand(set_splat_i32_m256i(0xffff));
624 i16x8 { sse: pack_i32_to_u16_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a) ) }
625 } else if #[cfg(target_feature="sse2")] {
626 let a = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.a.sse));
627 let b = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.b.sse));
628
629 i16x8 { sse: pack_i32_to_i16_m128i( a, b) }
630 } else {
631 i16x8::new([
632 v.as_array_ref()[0] as i16,
633 v.as_array_ref()[1] as i16,
634 v.as_array_ref()[2] as i16,
635 v.as_array_ref()[3] as i16,
636 v.as_array_ref()[4] as i16,
637 v.as_array_ref()[5] as i16,
638 v.as_array_ref()[6] as i16,
639 v.as_array_ref()[7] as i16,
640 ])
641 }
642 }
643 }
644
645 #[inline]
646 #[must_use]
647 pub fn from_slice_unaligned(input: &[i16]) -> Self {
648 assert!(input.len() >= 8);
649
650 pick! {
651 if #[cfg(target_feature="sse2")] {
652 unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
653 } else if #[cfg(target_feature="simd128")] {
654 unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
655 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
656 unsafe { Self { neon: vld1q_s16( input.as_ptr() as *const i16 ) } }
657 } else {
658 unsafe { Self::new( *(input.as_ptr() as * const [i16;8]) ) }
660 }
661 }
662 }
663
664 #[inline]
665 #[must_use]
666 pub fn blend(self, t: Self, f: Self) -> Self {
667 pick! {
668 if #[cfg(target_feature="sse4.1")] {
669 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
670 } else if #[cfg(target_feature="simd128")] {
671 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
672 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
673 unsafe {Self { neon: vbslq_s16(vreinterpretq_u16_s16(self.neon), t.neon, f.neon) }}
674 } else {
675 generic_bit_blend(self, t, f)
676 }
677 }
678 }
679 #[inline]
680 #[must_use]
681 pub fn is_negative(self) -> Self {
682 self.cmp_lt(Self::zeroed())
683 }
684
685 #[inline]
687 #[must_use]
688 pub fn reduce_add(self) -> i16 {
689 pick! {
690 if #[cfg(target_feature="sse2")] {
691 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
693 let sum64 = add_i16_m128i(self.sse, hi64);
694 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
695 let sum32 = add_i16_m128i(sum64, hi32);
696 let lo16 = shr_imm_u32_m128i::<16>(sum32);
697 let sum16 = add_i16_m128i(sum32, lo16);
698 extract_i16_as_i32_m128i::<0>(sum16) as i16
699 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
700 unsafe { vaddvq_s16(self.neon) }
701 } else {
702 let arr: [i16; 8] = cast(self);
703
704 let mut r = arr[0];
706 r = r.wrapping_add(arr[1]);
707 r = r.wrapping_add(arr[2]);
708 r = r.wrapping_add(arr[3]);
709 r = r.wrapping_add(arr[4]);
710 r = r.wrapping_add(arr[5]);
711 r = r.wrapping_add(arr[6]);
712 r.wrapping_add(arr[7])
713 }
714 }
715 }
716
717 #[inline]
719 #[must_use]
720 pub fn reduce_min(self) -> i16 {
721 pick! {
722 if #[cfg(target_feature="sse2")] {
723 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
724 let sum64 = min_i16_m128i(self.sse, hi64);
725 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
726 let sum32 = min_i16_m128i(sum64, hi32);
727 let lo16 = shr_imm_u32_m128i::<16>(sum32);
728 let sum16 = min_i16_m128i(sum32, lo16);
729 extract_i16_as_i32_m128i::<0>(sum16) as i16
730 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
731 unsafe { vminvq_s16(self.neon) }
732 } else {
733 let arr: [i16; 8] = cast(self);
734
735 let mut r = arr[0];
737 r = r.min(arr[1]);
738 r = r.min(arr[2]);
739 r = r.min(arr[3]);
740 r = r.min(arr[4]);
741 r = r.min(arr[5]);
742 r = r.min(arr[6]);
743 r.min(arr[7])
744 }
745 }
746 }
747
748 #[inline]
750 #[must_use]
751 pub fn reduce_max(self) -> i16 {
752 pick! {
753 if #[cfg(target_feature="sse2")] {
754 let hi64 = shuffle_ai_f32_all_m128i::<0b01_00_11_10>(self.sse);
755 let sum64 = max_i16_m128i(self.sse, hi64);
756 let hi32 = shuffle_ai_f32_all_m128i::<0b11_10_00_01>(sum64);
757 let sum32 = max_i16_m128i(sum64, hi32);
758 let lo16 = shr_imm_u32_m128i::<16>(sum32);
759 let sum16 = max_i16_m128i(sum32, lo16);
760 extract_i16_as_i32_m128i::<0>(sum16) as i16
761 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
762 unsafe { vmaxvq_s16(self.neon) }
763 } else {
764 let arr: [i16; 8] = cast(self);
765
766 let mut r = arr[0];
768 r = r.max(arr[1]);
769 r = r.max(arr[2]);
770 r = r.max(arr[3]);
771 r = r.max(arr[4]);
772 r = r.max(arr[5]);
773 r = r.max(arr[6]);
774 r.max(arr[7])
775 }
776 }
777 }
778
779 #[inline]
780 #[must_use]
781 pub fn abs(self) -> Self {
782 pick! {
783 if #[cfg(target_feature="sse2")] {
784 let mask = shr_imm_i16_m128i::<15>(self.sse);
785 Self { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
786 } else if #[cfg(target_feature="ssse3")] {
787 Self { sse: abs_i16_m128i(self.sse) }
788 } else if #[cfg(target_feature="simd128")] {
789 Self { simd: i16x8_abs(self.simd) }
790 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
791 unsafe {Self { neon: vabsq_s16(self.neon) }}
792 } else {
793 let arr: [i16; 8] = cast(self);
794 cast(
795 [
796 arr[0].wrapping_abs(),
797 arr[1].wrapping_abs(),
798 arr[2].wrapping_abs(),
799 arr[3].wrapping_abs(),
800 arr[4].wrapping_abs(),
801 arr[5].wrapping_abs(),
802 arr[6].wrapping_abs(),
803 arr[7].wrapping_abs(),
804 ])
805 }
806 }
807 }
808
809 #[inline]
810 #[must_use]
811 pub fn unsigned_abs(self) -> u16x8 {
812 pick! {
813 if #[cfg(target_feature="sse2")] {
814 let mask = shr_imm_i16_m128i::<15>(self.sse);
815 u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
816 } else if #[cfg(target_feature="ssse3")] {
817 u16x8 { sse: abs_i16_m128i(self.sse) }
818 } else if #[cfg(target_feature="simd128")] {
819 u16x8 { simd: i16x8_abs(self.simd) }
820 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
821 unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }}
822 } else {
823 let arr: [i16; 8] = cast(self);
824 cast(
825 [
826 arr[0].unsigned_abs(),
827 arr[1].unsigned_abs(),
828 arr[2].unsigned_abs(),
829 arr[3].unsigned_abs(),
830 arr[4].unsigned_abs(),
831 arr[5].unsigned_abs(),
832 arr[6].unsigned_abs(),
833 arr[7].unsigned_abs(),
834 ])
835 }
836 }
837 }
838
839 #[inline]
840 #[must_use]
841 pub fn max(self, rhs: Self) -> Self {
842 pick! {
843 if #[cfg(target_feature="sse2")] {
844 Self { sse: max_i16_m128i(self.sse, rhs.sse) }
845 } else if #[cfg(target_feature="simd128")] {
846 Self { simd: i16x8_max(self.simd, rhs.simd) }
847 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
848 unsafe {Self { neon: vmaxq_s16(self.neon, rhs.neon) }}
849 } else {
850 self.cmp_lt(rhs).blend(rhs, self)
851 }
852 }
853 }
854 #[inline]
855 #[must_use]
856 pub fn min(self, rhs: Self) -> Self {
857 pick! {
858 if #[cfg(target_feature="sse2")] {
859 Self { sse: min_i16_m128i(self.sse, rhs.sse) }
860 } else if #[cfg(target_feature="simd128")] {
861 Self { simd: i16x8_min(self.simd, rhs.simd) }
862 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
863 unsafe {Self { neon: vminq_s16(self.neon, rhs.neon) }}
864 } else {
865 self.cmp_lt(rhs).blend(self, rhs)
866 }
867 }
868 }
869
870 #[inline]
871 #[must_use]
872 pub fn saturating_add(self, rhs: Self) -> Self {
873 pick! {
874 if #[cfg(target_feature="sse2")] {
875 Self { sse: add_saturating_i16_m128i(self.sse, rhs.sse) }
876 } else if #[cfg(target_feature="simd128")] {
877 Self { simd: i16x8_add_sat(self.simd, rhs.simd) }
878 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
879 unsafe {Self { neon: vqaddq_s16(self.neon, rhs.neon) }}
880 } else {
881 Self { arr: [
882 self.arr[0].saturating_add(rhs.arr[0]),
883 self.arr[1].saturating_add(rhs.arr[1]),
884 self.arr[2].saturating_add(rhs.arr[2]),
885 self.arr[3].saturating_add(rhs.arr[3]),
886 self.arr[4].saturating_add(rhs.arr[4]),
887 self.arr[5].saturating_add(rhs.arr[5]),
888 self.arr[6].saturating_add(rhs.arr[6]),
889 self.arr[7].saturating_add(rhs.arr[7]),
890 ]}
891 }
892 }
893 }
894 #[inline]
895 #[must_use]
896 pub fn saturating_sub(self, rhs: Self) -> Self {
897 pick! {
898 if #[cfg(target_feature="sse2")] {
899 Self { sse: sub_saturating_i16_m128i(self.sse, rhs.sse) }
900 } else if #[cfg(target_feature="simd128")] {
901 Self { simd: i16x8_sub_sat(self.simd, rhs.simd) }
902 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
903 unsafe { Self { neon: vqsubq_s16(self.neon, rhs.neon) } }
904 } else {
905 Self { arr: [
906 self.arr[0].saturating_sub(rhs.arr[0]),
907 self.arr[1].saturating_sub(rhs.arr[1]),
908 self.arr[2].saturating_sub(rhs.arr[2]),
909 self.arr[3].saturating_sub(rhs.arr[3]),
910 self.arr[4].saturating_sub(rhs.arr[4]),
911 self.arr[5].saturating_sub(rhs.arr[5]),
912 self.arr[6].saturating_sub(rhs.arr[6]),
913 self.arr[7].saturating_sub(rhs.arr[7]),
914 ]}
915 }
916 }
917 }
918
919 #[inline]
924 #[must_use]
925 pub fn dot(self, rhs: Self) -> i32x4 {
926 pick! {
927 if #[cfg(target_feature="sse2")] {
928 i32x4 { sse: mul_i16_horizontal_add_m128i(self.sse, rhs.sse) }
929 } else if #[cfg(target_feature="simd128")] {
930 i32x4 { simd: i32x4_dot_i16x8(self.simd, rhs.simd) }
931 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
932 unsafe {
933 let pl = vmull_s16(vget_low_s16(self.neon), vget_low_s16(rhs.neon));
934 let ph = vmull_high_s16(self.neon, rhs.neon);
935 i32x4 { neon: vpaddq_s32(pl, ph) }
936 }
937 } else {
938 i32x4 { arr: [
939 (i32::from(self.arr[0]) * i32::from(rhs.arr[0])) + (i32::from(self.arr[1]) * i32::from(rhs.arr[1])),
940 (i32::from(self.arr[2]) * i32::from(rhs.arr[2])) + (i32::from(self.arr[3]) * i32::from(rhs.arr[3])),
941 (i32::from(self.arr[4]) * i32::from(rhs.arr[4])) + (i32::from(self.arr[5]) * i32::from(rhs.arr[5])),
942 (i32::from(self.arr[6]) * i32::from(rhs.arr[6])) + (i32::from(self.arr[7]) * i32::from(rhs.arr[7])),
943 ] }
944 }
945 }
946 }
947
948 #[inline]
956 #[must_use]
957 pub fn mul_scale_round(self, rhs: Self) -> Self {
958 pick! {
959 if #[cfg(target_feature="ssse3")] {
960 Self { sse: mul_i16_scale_round_m128i(self.sse, rhs.sse) }
961 } else if #[cfg(target_feature="sse2")] {
962 let hi = mul_i16_keep_high_m128i(self.sse, rhs.sse);
964 let lo = mul_i16_keep_low_m128i(self.sse, rhs.sse);
965 let mut v1 = unpack_low_i16_m128i(lo, hi);
966 let mut v2 = unpack_high_i16_m128i(lo, hi);
967 let a = set_splat_i32_m128i(0x4000);
968 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
969 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
970 let s = pack_i32_to_i16_m128i(v1, v2);
971 Self { sse: s }
972 } else if #[cfg(target_feature="simd128")] {
973 Self { simd: i16x8_q15mulr_sat(self.simd, rhs.simd) }
974 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
975 unsafe { Self { neon: vqrdmulhq_s16(self.neon, rhs.neon) } }
976 } else {
977 Self { arr: [
979 ((i32::from(self.arr[0]) * i32::from(rhs.arr[0]) + 0x4000) >> 15) as i16,
980 ((i32::from(self.arr[1]) * i32::from(rhs.arr[1]) + 0x4000) >> 15) as i16,
981 ((i32::from(self.arr[2]) * i32::from(rhs.arr[2]) + 0x4000) >> 15) as i16,
982 ((i32::from(self.arr[3]) * i32::from(rhs.arr[3]) + 0x4000) >> 15) as i16,
983 ((i32::from(self.arr[4]) * i32::from(rhs.arr[4]) + 0x4000) >> 15) as i16,
984 ((i32::from(self.arr[5]) * i32::from(rhs.arr[5]) + 0x4000) >> 15) as i16,
985 ((i32::from(self.arr[6]) * i32::from(rhs.arr[6]) + 0x4000) >> 15) as i16,
986 ((i32::from(self.arr[7]) * i32::from(rhs.arr[7]) + 0x4000) >> 15) as i16,
987 ]}
988 }
989 }
990 }
991
992 #[inline]
994 #[must_use]
995 pub fn mul_keep_high(lhs: Self, rhs: Self) -> Self {
996 pick! {
997 if #[cfg(target_feature="sse2")] {
998 Self { sse: mul_i16_keep_high_m128i(lhs.sse, rhs.sse) }
999 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1000 let lhs_low = unsafe { vget_low_s16(lhs.neon) };
1001 let rhs_low = unsafe { vget_low_s16(rhs.neon) };
1002
1003 let lhs_high = unsafe { vget_high_s16(lhs.neon) };
1004 let rhs_high = unsafe { vget_high_s16(rhs.neon) };
1005
1006 let low = unsafe { vmull_s16(lhs_low, rhs_low) };
1007 let high = unsafe { vmull_s16(lhs_high, rhs_high) };
1008
1009 i16x8 { neon: unsafe { vreinterpretq_s16_u16(vuzpq_u16(vreinterpretq_u16_s32(low), vreinterpretq_u16_s32(high)).1) } }
1010 } else if #[cfg(target_feature="simd128")] {
1011 let low = i32x4_extmul_low_i16x8(lhs.simd, rhs.simd);
1012 let high = i32x4_extmul_high_i16x8(lhs.simd, rhs.simd);
1013
1014 Self { simd: i16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(low, high) }
1015 } else {
1016 i16x8::new([
1017 ((i32::from(rhs.as_array_ref()[0]) * i32::from(lhs.as_array_ref()[0])) >> 16) as i16,
1018 ((i32::from(rhs.as_array_ref()[1]) * i32::from(lhs.as_array_ref()[1])) >> 16) as i16,
1019 ((i32::from(rhs.as_array_ref()[2]) * i32::from(lhs.as_array_ref()[2])) >> 16) as i16,
1020 ((i32::from(rhs.as_array_ref()[3]) * i32::from(lhs.as_array_ref()[3])) >> 16) as i16,
1021 ((i32::from(rhs.as_array_ref()[4]) * i32::from(lhs.as_array_ref()[4])) >> 16) as i16,
1022 ((i32::from(rhs.as_array_ref()[5]) * i32::from(lhs.as_array_ref()[5])) >> 16) as i16,
1023 ((i32::from(rhs.as_array_ref()[6]) * i32::from(lhs.as_array_ref()[6])) >> 16) as i16,
1024 ((i32::from(rhs.as_array_ref()[7]) * i32::from(lhs.as_array_ref()[7])) >> 16) as i16,
1025 ])
1026 }
1027 }
1028 }
1029
1030 #[inline]
1032 #[must_use]
1033 pub fn mul_widen(self, rhs: Self) -> i32x8 {
1034 pick! {
1035 if #[cfg(target_feature="avx2")] {
1036 let a = convert_to_i32_m256i_from_i16_m128i(self.sse);
1037 let b = convert_to_i32_m256i_from_i16_m128i(rhs.sse);
1038 i32x8 { avx2: mul_i32_keep_low_m256i(a,b) }
1039 } else if #[cfg(target_feature="sse2")] {
1040 let low = mul_i16_keep_low_m128i(self.sse, rhs.sse);
1041 let high = mul_i16_keep_high_m128i(self.sse, rhs.sse);
1042 i32x8 {
1043 a: i32x4 { sse:unpack_low_i16_m128i(low, high) },
1044 b: i32x4 { sse:unpack_high_i16_m128i(low, high) }
1045 }
1046 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1047 let lhs_low = unsafe { vget_low_s16(self.neon) };
1048 let rhs_low = unsafe { vget_low_s16(rhs.neon) };
1049
1050 let lhs_high = unsafe { vget_high_s16(self.neon) };
1051 let rhs_high = unsafe { vget_high_s16(rhs.neon) };
1052
1053 let low = unsafe { vmull_s16(lhs_low, rhs_low) };
1054 let high = unsafe { vmull_s16(lhs_high, rhs_high) };
1055
1056 i32x8 { a: i32x4 { neon: low }, b: i32x4 {neon: high } }
1057 } else {
1058 let a = self.as_array_ref();
1059 let b = rhs.as_array_ref();
1060 i32x8::new([
1061 i32::from(a[0]) * i32::from(b[0]),
1062 i32::from(a[1]) * i32::from(b[1]),
1063 i32::from(a[2]) * i32::from(b[2]),
1064 i32::from(a[3]) * i32::from(b[3]),
1065 i32::from(a[4]) * i32::from(b[4]),
1066 i32::from(a[5]) * i32::from(b[5]),
1067 i32::from(a[6]) * i32::from(b[6]),
1068 i32::from(a[7]) * i32::from(b[7]),
1069 ])
1070 }
1071 }
1072 }
1073
1074 #[must_use]
1076 #[inline]
1077 pub fn transpose(data: [i16x8; 8]) -> [i16x8; 8] {
1078 pick! {
1079 if #[cfg(target_feature="sse2")] {
1080 let a1 = unpack_low_i16_m128i(data[0].sse, data[1].sse);
1081 let a2 = unpack_high_i16_m128i(data[0].sse, data[1].sse);
1082 let a3 = unpack_low_i16_m128i(data[2].sse, data[3].sse);
1083 let a4 = unpack_high_i16_m128i(data[2].sse, data[3].sse);
1084 let a5 = unpack_low_i16_m128i(data[4].sse, data[5].sse);
1085 let a6 = unpack_high_i16_m128i(data[4].sse, data[5].sse);
1086 let a7 = unpack_low_i16_m128i(data[6].sse, data[7].sse);
1087 let a8 = unpack_high_i16_m128i(data[6].sse, data[7].sse);
1088
1089 let b1 = unpack_low_i32_m128i(a1, a3);
1090 let b2 = unpack_high_i32_m128i(a1, a3);
1091 let b3 = unpack_low_i32_m128i(a2, a4);
1092 let b4 = unpack_high_i32_m128i(a2, a4);
1093 let b5 = unpack_low_i32_m128i(a5, a7);
1094 let b6 = unpack_high_i32_m128i(a5, a7);
1095 let b7 = unpack_low_i32_m128i(a6, a8);
1096 let b8 = unpack_high_i32_m128i(a6, a8);
1097
1098 [
1099 i16x8 { sse: unpack_low_i64_m128i(b1, b5) },
1100 i16x8 { sse: unpack_high_i64_m128i(b1, b5) },
1101 i16x8 { sse: unpack_low_i64_m128i(b2, b6) },
1102 i16x8 { sse: unpack_high_i64_m128i(b2, b6) },
1103 i16x8 { sse: unpack_low_i64_m128i(b3, b7) },
1104 i16x8 { sse: unpack_high_i64_m128i(b3, b7) },
1105 i16x8 { sse: unpack_low_i64_m128i(b4, b8) },
1106 i16x8 { sse: unpack_high_i64_m128i(b4, b8) } ,
1107 ]
1108 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1109
1110 #[inline] fn vtrq32(a : int16x8_t, b : int16x8_t) -> (int16x8_t, int16x8_t)
1111 {
1112 unsafe {
1113 let r = vtrnq_s32(vreinterpretq_s32_s16(a),vreinterpretq_s32_s16(b));
1114 (vreinterpretq_s16_s32(r.0), vreinterpretq_s16_s32(r.1))
1115 }
1116 }
1117
1118 unsafe {
1119 let (q0,q2) = vtrq32(data[0].neon, data[2].neon);
1120 let (q1,q3) = vtrq32(data[1].neon, data[3].neon);
1121 let (q4,q6) = vtrq32(data[4].neon, data[6].neon);
1122 let (q5,q7) = vtrq32(data[5].neon, data[7].neon);
1123
1124 let b1 = vtrnq_s16(q0, q1);
1125 let b2 = vtrnq_s16(q2, q3);
1126 let b3 = vtrnq_s16(q4, q5);
1127 let b4 = vtrnq_s16(q6, q7);
1128
1129 [
1133 i16x8 { neon: vcombine_s16(vget_low_s16(b1.0), vget_low_s16(b3.0)) },
1134 i16x8 { neon: vcombine_s16(vget_low_s16(b1.1), vget_low_s16(b3.1)) },
1135 i16x8 { neon: vcombine_s16(vget_low_s16(b2.0), vget_low_s16(b4.0)) },
1136 i16x8 { neon: vcombine_s16(vget_low_s16(b2.1), vget_low_s16(b4.1)) },
1137 i16x8 { neon: vcombine_s16(vget_high_s16(b1.0), vget_high_s16(b3.0)) },
1138 i16x8 { neon: vcombine_s16(vget_high_s16(b1.1), vget_high_s16(b3.1)) },
1139 i16x8 { neon: vcombine_s16(vget_high_s16(b2.0), vget_high_s16(b4.0)) },
1140 i16x8 { neon: vcombine_s16(vget_high_s16(b2.1), vget_high_s16(b4.1)) },
1141 ]
1142 }
1143 } else if #[cfg(target_feature="simd128")] {
1144 #[inline] fn lo_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a,b) }
1145 #[inline] fn hi_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a,b) }
1146 #[inline] fn lo_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<0, 4, 1, 5>(a,b) }
1147 #[inline] fn hi_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<2, 6, 3, 7>(a,b) }
1148 #[inline] fn lo_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<0, 2>(a,b) }
1149 #[inline] fn hi_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<1, 3>(a,b) }
1150
1151 let a1 = lo_i16(data[0].simd, data[1].simd);
1152 let a2 = hi_i16(data[0].simd, data[1].simd);
1153 let a3 = lo_i16(data[2].simd, data[3].simd);
1154 let a4 = hi_i16(data[2].simd, data[3].simd);
1155 let a5 = lo_i16(data[4].simd, data[5].simd);
1156 let a6 = hi_i16(data[4].simd, data[5].simd);
1157 let a7 = lo_i16(data[6].simd, data[7].simd);
1158 let a8 = hi_i16(data[6].simd, data[7].simd);
1159
1160 let b1 = lo_i32(a1, a3);
1161 let b2 = hi_i32(a1, a3);
1162 let b3 = lo_i32(a2, a4);
1163 let b4 = hi_i32(a2, a4);
1164 let b5 = lo_i32(a5, a7);
1165 let b6 = hi_i32(a5, a7);
1166 let b7 = lo_i32(a6, a8);
1167 let b8 = hi_i32(a6, a8);
1168
1169 [
1170 i16x8 { simd: lo_i64(b1, b5) },
1171 i16x8 { simd: hi_i64(b1, b5) },
1172 i16x8 { simd: lo_i64(b2, b6) },
1173 i16x8 { simd: hi_i64(b2, b6) },
1174 i16x8 { simd: lo_i64(b3, b7) },
1175 i16x8 { simd: hi_i64(b3, b7) },
1176 i16x8 { simd: lo_i64(b4, b8) },
1177 i16x8 { simd: hi_i64(b4, b8) } ,
1178 ]
1179
1180 } else {
1181 #[inline(always)]
1182 fn transpose_column(data: &[i16x8; 8], index: usize) -> i16x8 {
1183 i16x8::new([
1184 data[0].as_array_ref()[index],
1185 data[1].as_array_ref()[index],
1186 data[2].as_array_ref()[index],
1187 data[3].as_array_ref()[index],
1188 data[4].as_array_ref()[index],
1189 data[5].as_array_ref()[index],
1190 data[6].as_array_ref()[index],
1191 data[7].as_array_ref()[index],
1192 ])
1193 }
1194
1195 [
1196 transpose_column(&data, 0),
1197 transpose_column(&data, 1),
1198 transpose_column(&data, 2),
1199 transpose_column(&data, 3),
1200 transpose_column(&data, 4),
1201 transpose_column(&data, 5),
1202 transpose_column(&data, 6),
1203 transpose_column(&data, 7),
1204 ]
1205 }
1206 }
1207 }
1208
1209 #[inline]
1210 #[must_use]
1211 pub fn mul_scale_round_n(self, rhs: i16) -> Self {
1219 pick! {
1220 if #[cfg(target_feature="ssse3")] {
1221 Self { sse: mul_i16_scale_round_m128i(self.sse, set_splat_i16_m128i(rhs)) }
1222 } else if #[cfg(target_feature="sse2")] {
1223 let r = set_splat_i16_m128i(rhs);
1225 let hi = mul_i16_keep_high_m128i(self.sse, r);
1226 let lo = mul_i16_keep_low_m128i(self.sse, r);
1227 let mut v1 = unpack_low_i16_m128i(lo, hi);
1228 let mut v2 = unpack_high_i16_m128i(lo, hi);
1229 let a = set_splat_i32_m128i(0x4000);
1230 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
1231 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
1232 let s = pack_i32_to_i16_m128i(v1, v2);
1233 Self { sse: s }
1234 } else if #[cfg(target_feature="simd128")] {
1235 Self { simd: i16x8_q15mulr_sat(self.simd, i16x8_splat(rhs)) }
1236 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1237 unsafe { Self { neon: vqrdmulhq_n_s16(self.neon, rhs) } }
1238 } else {
1239 Self { arr: [
1241 ((i32::from(self.arr[0]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1242 ((i32::from(self.arr[1]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1243 ((i32::from(self.arr[2]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1244 ((i32::from(self.arr[3]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1245 ((i32::from(self.arr[4]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1246 ((i32::from(self.arr[5]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1247 ((i32::from(self.arr[6]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1248 ((i32::from(self.arr[7]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1249 ]}
1250 }
1251 }
1252 }
1253
1254 #[inline]
1255 pub fn to_array(self) -> [i16; 8] {
1256 cast(self)
1257 }
1258
1259 #[inline]
1260 pub fn as_array_ref(&self) -> &[i16; 8] {
1261 cast_ref(self)
1262 }
1263
1264 #[inline]
1265 pub fn as_array_mut(&mut self) -> &mut [i16; 8] {
1266 cast_mut(self)
1267 }
1268}