1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(32))]
7 pub struct i32x8 { pub(crate) avx2: m256i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(32))]
11 pub struct i32x8 { pub(crate) a : i32x4, pub(crate) b : i32x4}
12 }
13}
14
15int_uint_consts!(i32, 8, i32x8, 256);
16
17unsafe impl Zeroable for i32x8 {}
18unsafe impl Pod for i32x8 {}
19
20impl Add for i32x8 {
21 type Output = Self;
22 #[inline]
23 #[must_use]
24 fn add(self, rhs: Self) -> Self::Output {
25 pick! {
26 if #[cfg(target_feature="avx2")] {
27 Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
28 } else {
29 Self {
30 a : self.a.add(rhs.a),
31 b : self.b.add(rhs.b),
32 }
33 }
34 }
35 }
36}
37
38impl Sub for i32x8 {
39 type Output = Self;
40 #[inline]
41 #[must_use]
42 fn sub(self, rhs: Self) -> Self::Output {
43 pick! {
44 if #[cfg(target_feature="avx2")] {
45 Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
46 } else {
47 Self {
48 a : self.a.sub(rhs.a),
49 b : self.b.sub(rhs.b),
50 }
51 }
52 }
53 }
54}
55
56impl Mul for i32x8 {
57 type Output = Self;
58 #[inline]
59 #[must_use]
60 fn mul(self, rhs: Self) -> Self::Output {
61 pick! {
62 if #[cfg(target_feature="avx2")] {
63 Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
64 } else {
65 Self {
66 a : self.a.mul(rhs.a),
67 b : self.b.mul(rhs.b),
68 }
69 }
70 }
71 }
72}
73
74impl Add<i32> for i32x8 {
75 type Output = Self;
76 #[inline]
77 #[must_use]
78 fn add(self, rhs: i32) -> Self::Output {
79 self.add(Self::splat(rhs))
80 }
81}
82
83impl Sub<i32> for i32x8 {
84 type Output = Self;
85 #[inline]
86 #[must_use]
87 fn sub(self, rhs: i32) -> Self::Output {
88 self.sub(Self::splat(rhs))
89 }
90}
91
92impl Mul<i32> for i32x8 {
93 type Output = Self;
94 #[inline]
95 #[must_use]
96 fn mul(self, rhs: i32) -> Self::Output {
97 self.mul(Self::splat(rhs))
98 }
99}
100
101impl Add<i32x8> for i32 {
102 type Output = i32x8;
103 #[inline]
104 #[must_use]
105 fn add(self, rhs: i32x8) -> Self::Output {
106 i32x8::splat(self) + rhs
107 }
108}
109
110impl Sub<i32x8> for i32 {
111 type Output = i32x8;
112 #[inline]
113 #[must_use]
114 fn sub(self, rhs: i32x8) -> Self::Output {
115 i32x8::splat(self) - rhs
116 }
117}
118
119impl Mul<i32x8> for i32 {
120 type Output = i32x8;
121 #[inline]
122 #[must_use]
123 fn mul(self, rhs: i32x8) -> Self::Output {
124 i32x8::splat(self) * rhs
125 }
126}
127
128impl BitAnd for i32x8 {
129 type Output = Self;
130 #[inline]
131 #[must_use]
132 fn bitand(self, rhs: Self) -> Self::Output {
133 pick! {
134 if #[cfg(target_feature="avx2")] {
135 Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
136 } else {
137 Self {
138 a : self.a.bitand(rhs.a),
139 b : self.b.bitand(rhs.b),
140 }
141 }
142 }
143 }
144}
145
146impl BitOr for i32x8 {
147 type Output = Self;
148 #[inline]
149 #[must_use]
150 fn bitor(self, rhs: Self) -> Self::Output {
151 pick! {
152 if #[cfg(target_feature="avx2")] {
153 Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
154 } else {
155 Self {
156 a : self.a.bitor(rhs.a),
157 b : self.b.bitor(rhs.b),
158 }
159 } }
160 }
161}
162
163impl BitXor for i32x8 {
164 type Output = Self;
165 #[inline]
166 #[must_use]
167 fn bitxor(self, rhs: Self) -> Self::Output {
168 pick! {
169 if #[cfg(target_feature="avx2")] {
170 Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
171 } else {
172 Self {
173 a : self.a.bitxor(rhs.a),
174 b : self.b.bitxor(rhs.b),
175 }
176 }
177 }
178 }
179}
180
181macro_rules! impl_shl_t_for_i32x8 {
182 ($($shift_type:ty),+ $(,)?) => {
183 $(impl Shl<$shift_type> for i32x8 {
184 type Output = Self;
185 #[inline]
187 #[must_use]
188 fn shl(self, rhs: $shift_type) -> Self::Output {
189 pick! {
190 if #[cfg(target_feature="avx2")] {
191 let shift = cast([rhs as u64, 0]);
192 Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
193 } else {
194 Self {
195 a : self.a.shl(rhs),
196 b : self.b.shl(rhs),
197 }
198 }
199 }
200 }
201 })+
202 };
203}
204impl_shl_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
205
206macro_rules! impl_shr_t_for_i32x8 {
207 ($($shift_type:ty),+ $(,)?) => {
208 $(impl Shr<$shift_type> for i32x8 {
209 type Output = Self;
210 #[inline]
212 #[must_use]
213 fn shr(self, rhs: $shift_type) -> Self::Output {
214 pick! {
215 if #[cfg(target_feature="avx2")] {
216 let shift = cast([rhs as u64, 0]);
217 Self { avx2: shr_all_i32_m256i(self.avx2, shift) }
218 } else {
219 Self {
220 a : self.a.shr(rhs),
221 b : self.b.shr(rhs),
222 }
223 }
224 }
225 }
226 })+
227 };
228}
229
230impl_shr_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
231
232impl Shr<i32x8> for i32x8 {
238 type Output = Self;
239
240 #[inline]
241 #[must_use]
242 fn shr(self, rhs: i32x8) -> Self::Output {
243 pick! {
244 if #[cfg(target_feature="avx2")] {
245 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
247 Self { avx2: shr_each_i32_m256i(self.avx2, shift_by ) }
248 } else {
249 Self {
250 a : self.a.shr(rhs.a),
251 b : self.b.shr(rhs.b),
252 }
253 }
254 }
255 }
256}
257
258impl Shl<i32x8> for i32x8 {
264 type Output = Self;
265
266 #[inline]
267 #[must_use]
268 fn shl(self, rhs: i32x8) -> Self::Output {
269 pick! {
270 if #[cfg(target_feature="avx2")] {
271 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
273 Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
275 } else {
276 Self {
277 a : self.a.shl(rhs.a),
278 b : self.b.shl(rhs.b),
279 }
280 }
281 }
282 }
283}
284
285impl CmpEq for i32x8 {
286 type Output = Self;
287 #[inline]
288 #[must_use]
289 fn cmp_eq(self, rhs: Self) -> Self::Output {
290 pick! {
291 if #[cfg(target_feature="avx2")] {
292 Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2) }
293 } else {
294 Self {
295 a : self.a.cmp_eq(rhs.a),
296 b : self.b.cmp_eq(rhs.b),
297 }
298 }
299 }
300 }
301}
302
303impl CmpGt for i32x8 {
304 type Output = Self;
305 #[inline]
306 #[must_use]
307 fn cmp_gt(self, rhs: Self) -> Self::Output {
308 pick! {
309 if #[cfg(target_feature="avx2")] {
310 Self { avx2: cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2) }
311 } else {
312 Self {
313 a : self.a.cmp_gt(rhs.a),
314 b : self.b.cmp_gt(rhs.b),
315 }
316 }
317 }
318 }
319}
320
321impl CmpLt for i32x8 {
322 type Output = Self;
323 #[inline]
324 #[must_use]
325 fn cmp_lt(self, rhs: Self) -> Self::Output {
326 pick! {
327 if #[cfg(target_feature="avx2")] {
328 Self { avx2: !cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i32_m256i(self.avx2,rhs.avx2) }
329 } else {
330 Self {
331 a : self.a.cmp_lt(rhs.a),
332 b : self.b.cmp_lt(rhs.b),
333 }
334 }
335 }
336 }
337}
338
339impl From<i16x8> for i32x8 {
340 #[inline]
341 #[must_use]
342 fn from(value: i16x8) -> Self {
343 i32x8::from_i16x8(value)
344 }
345}
346
347impl i32x8 {
348 #[inline]
349 #[must_use]
350 pub const fn new(array: [i32; 8]) -> Self {
351 unsafe { core::intrinsics::transmute(array) }
352 }
353
354 #[inline]
356 #[must_use]
357 pub fn from_i16x8(v: i16x8) -> Self {
358 pick! {
359 if #[cfg(target_feature="avx2")] {
360 i32x8 { avx2:convert_to_i32_m256i_from_i16_m128i(v.sse) }
361 } else if #[cfg(target_feature="sse2")] {
362 i32x8 {
363 a: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
364 b: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
365 }
366 } else {
367 i32x8::new([
368 i32::from(v.as_array_ref()[0]),
369 i32::from(v.as_array_ref()[1]),
370 i32::from(v.as_array_ref()[2]),
371 i32::from(v.as_array_ref()[3]),
372 i32::from(v.as_array_ref()[4]),
373 i32::from(v.as_array_ref()[5]),
374 i32::from(v.as_array_ref()[6]),
375 i32::from(v.as_array_ref()[7]),
376 ])
377 }
378 }
379 }
380
381 #[inline]
383 #[must_use]
384 pub fn from_u16x8(v: u16x8) -> Self {
385 pick! {
386 if #[cfg(target_feature="avx2")] {
387 i32x8 { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
388 } else if #[cfg(target_feature="sse2")] {
389 i32x8 {
390 a: i32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
391 b: i32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
392 }
393 } else {
394 i32x8::new([
395 i32::from(v.as_array_ref()[0]),
396 i32::from(v.as_array_ref()[1]),
397 i32::from(v.as_array_ref()[2]),
398 i32::from(v.as_array_ref()[3]),
399 i32::from(v.as_array_ref()[4]),
400 i32::from(v.as_array_ref()[5]),
401 i32::from(v.as_array_ref()[6]),
402 i32::from(v.as_array_ref()[7]),
403 ])
404 }
405 }
406 }
407
408 #[inline]
409 #[must_use]
410 pub fn blend(self, t: Self, f: Self) -> Self {
411 pick! {
412 if #[cfg(target_feature="avx2")] {
413 Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
414 } else {
415 Self {
416 a : self.a.blend(t.a, f.a),
417 b : self.b.blend(t.b, f.b)
418 }
419 }
420 }
421 }
422
423 #[inline]
425 #[must_use]
426 pub fn reduce_add(self) -> i32 {
427 let arr: [i32x4; 2] = cast(self);
428 (arr[0] + arr[1]).reduce_add()
429 }
430
431 #[inline]
433 #[must_use]
434 pub fn reduce_max(self) -> i32 {
435 let arr: [i32x4; 2] = cast(self);
436 arr[0].max(arr[1]).reduce_max()
437 }
438
439 #[inline]
441 #[must_use]
442 pub fn reduce_min(self) -> i32 {
443 let arr: [i32x4; 2] = cast(self);
444 arr[0].min(arr[1]).reduce_min()
445 }
446
447 #[inline]
448 #[must_use]
449 pub fn abs(self) -> Self {
450 pick! {
451 if #[cfg(target_feature="avx2")] {
452 Self { avx2: abs_i32_m256i(self.avx2) }
453 } else {
454 Self {
455 a : self.a.abs(),
456 b : self.b.abs(),
457 }
458 }
459 }
460 }
461
462 #[inline]
463 #[must_use]
464 pub fn unsigned_abs(self) -> u32x8 {
465 pick! {
466 if #[cfg(target_feature="avx2")] {
467 u32x8 { avx2: abs_i32_m256i(self.avx2) }
468 } else {
469 u32x8 {
470 a : self.a.unsigned_abs(),
471 b : self.b.unsigned_abs(),
472 }
473 }
474 }
475 }
476
477 #[inline]
478 #[must_use]
479 pub fn max(self, rhs: Self) -> Self {
480 pick! {
481 if #[cfg(target_feature="avx2")] {
482 Self { avx2: max_i32_m256i(self.avx2, rhs.avx2) }
483 } else {
484 Self {
485 a : self.a.max(rhs.a),
486 b : self.b.max(rhs.b),
487 }
488 }
489 }
490 }
491 #[inline]
492 #[must_use]
493 pub fn min(self, rhs: Self) -> Self {
494 pick! {
495 if #[cfg(target_feature="avx2")] {
496 Self { avx2: min_i32_m256i(self.avx2, rhs.avx2) }
497 } else {
498 Self {
499 a : self.a.min(rhs.a),
500 b : self.b.min(rhs.b),
501 }
502 }
503 }
504 }
505 #[inline]
506 #[must_use]
507 pub fn round_float(self) -> f32x8 {
508 pick! {
509 if #[cfg(target_feature="avx2")] {
510 cast(convert_to_m256_from_i32_m256i(self.avx2))
511 } else {
512 cast([
513 self.a.round_float(),
514 self.b.round_float(),
515 ])
516 }
517 }
518 }
519
520 #[inline]
521 #[must_use]
522 pub fn move_mask(self) -> i32 {
523 pick! {
524 if #[cfg(target_feature="avx2")] {
525 move_mask_m256(cast(self.avx2))
527 } else {
528 self.a.move_mask() | (self.b.move_mask() << 4)
529 }
530 }
531 }
532
533 #[inline]
534 #[must_use]
535 pub fn any(self) -> bool {
536 pick! {
537 if #[cfg(target_feature="avx2")] {
538 move_mask_m256(cast(self.avx2)) != 0
539 } else {
540 (self.a | self.b).any()
541 }
542 }
543 }
544 #[inline]
545 #[must_use]
546 pub fn all(self) -> bool {
547 pick! {
548 if #[cfg(target_feature="avx2")] {
549 move_mask_m256(cast(self.avx2)) == 0b11111111
550 } else {
551 (self.a & self.b).all()
552 }
553 }
554 }
555 #[inline]
556 #[must_use]
557 pub fn none(self) -> bool {
558 !self.any()
559 }
560
561 #[must_use]
563 #[inline]
564 pub fn transpose(data: [i32x8; 8]) -> [i32x8; 8] {
565 pick! {
566 if #[cfg(target_feature="avx2")] {
567 let a0 = unpack_low_i32_m256i(data[0].avx2, data[1].avx2);
568 let a1 = unpack_high_i32_m256i(data[0].avx2, data[1].avx2);
569 let a2 = unpack_low_i32_m256i(data[2].avx2, data[3].avx2);
570 let a3 = unpack_high_i32_m256i(data[2].avx2, data[3].avx2);
571 let a4 = unpack_low_i32_m256i(data[4].avx2, data[5].avx2);
572 let a5 = unpack_high_i32_m256i(data[4].avx2, data[5].avx2);
573 let a6 = unpack_low_i32_m256i(data[6].avx2, data[7].avx2);
574 let a7 = unpack_high_i32_m256i(data[6].avx2, data[7].avx2);
575
576 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
577 (z << 6) | (y << 4) | (x << 2) | w
578 }
579
580 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
581 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
582
583 let b0 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a0),cast(a2)));
586 let b1 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a0),cast(a2)));
587 let b2 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a1),cast(a3)));
588 let b3 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a1),cast(a3)));
589 let b4 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a4),cast(a6)));
590 let b5 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a4),cast(a6)));
591 let b6 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a5),cast(a7)));
592 let b7 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a5),cast(a7)));
593
594 [
595 i32x8 { avx2: permute2z_m256i::<0x20>(b0, b4) },
596 i32x8 { avx2: permute2z_m256i::<0x20>(b1, b5) },
597 i32x8 { avx2: permute2z_m256i::<0x20>(b2, b6) },
598 i32x8 { avx2: permute2z_m256i::<0x20>(b3, b7) },
599 i32x8 { avx2: permute2z_m256i::<0x31>(b0, b4) },
600 i32x8 { avx2: permute2z_m256i::<0x31>(b1, b5) },
601 i32x8 { avx2: permute2z_m256i::<0x31>(b2, b6) },
602 i32x8 { avx2: permute2z_m256i::<0x31>(b3, b7) }
603 ]
604 } else {
605 #[inline(always)]
608 fn transpose_column(data: &[i32x8; 8], index: usize) -> i32x8 {
609 i32x8::new([
610 data[0].as_array_ref()[index],
611 data[1].as_array_ref()[index],
612 data[2].as_array_ref()[index],
613 data[3].as_array_ref()[index],
614 data[4].as_array_ref()[index],
615 data[5].as_array_ref()[index],
616 data[6].as_array_ref()[index],
617 data[7].as_array_ref()[index],
618 ])
619 }
620
621 [
622 transpose_column(&data, 0),
623 transpose_column(&data, 1),
624 transpose_column(&data, 2),
625 transpose_column(&data, 3),
626 transpose_column(&data, 4),
627 transpose_column(&data, 5),
628 transpose_column(&data, 6),
629 transpose_column(&data, 7),
630 ]
631 }
632 }
633 }
634
635 #[inline]
636 pub fn to_array(self) -> [i32; 8] {
637 cast(self)
638 }
639
640 #[inline]
641 pub fn as_array_ref(&self) -> &[i32; 8] {
642 cast_ref(self)
643 }
644
645 #[inline]
646 pub fn as_array_mut(&mut self) -> &mut [i32; 8] {
647 cast_mut(self)
648 }
649}
650
651impl Not for i32x8 {
652 type Output = Self;
653 #[inline]
654 fn not(self) -> Self {
655 pick! {
656 if #[cfg(target_feature="avx2")] {
657 Self { avx2: self.avx2.not() }
658 } else {
659 Self {
660 a : self.a.not(),
661 b : self.b.not(),
662 }
663 }
664 }
665 }
666}