1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct u32x4 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct u32x4 { pub(crate) simd: v128 }
14
15 impl Default for u32x4 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for u32x4 {
22 fn eq(&self, other: &Self) -> bool {
23 u32x4_all_true(u32x4_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for u32x4 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct u32x4 { pub(crate) neon : uint32x4_t }
33
34 impl Default for u32x4 {
35 #[inline]
36 #[must_use]
37 fn default() -> Self {
38 Self::splat(0)
39 }
40 }
41
42 impl PartialEq for u32x4 {
43 #[inline]
44 #[must_use]
45 fn eq(&self, other: &Self) -> bool {
46 unsafe { vminvq_u32(vceqq_u32(self.neon, other.neon))==u32::MAX }
47 }
48 }
49
50 impl Eq for u32x4 { }
51} else {
52 #[derive(Default, Clone, Copy, PartialEq, Eq)]
53 #[repr(C, align(16))]
54 pub struct u32x4 { arr: [u32;4] }
55 }
56}
57
58int_uint_consts!(u32, 4, u32x4, 128);
59
60unsafe impl Zeroable for u32x4 {}
61unsafe impl Pod for u32x4 {}
62
63impl Add for u32x4 {
64 type Output = Self;
65 #[inline]
66 #[must_use]
67 fn add(self, rhs: Self) -> Self::Output {
68 pick! {
69 if #[cfg(target_feature="sse2")] {
70 Self { sse: add_i32_m128i(self.sse, rhs.sse) }
71 } else if #[cfg(target_feature="simd128")] {
72 Self { simd: u32x4_add(self.simd, rhs.simd) }
73 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74 unsafe { Self { neon: vaddq_u32(self.neon, rhs.neon) } }
75 } else {
76 Self { arr: [
77 self.arr[0].wrapping_add(rhs.arr[0]),
78 self.arr[1].wrapping_add(rhs.arr[1]),
79 self.arr[2].wrapping_add(rhs.arr[2]),
80 self.arr[3].wrapping_add(rhs.arr[3]),
81 ]}
82 }
83 }
84 }
85}
86
87impl Sub for u32x4 {
88 type Output = Self;
89 #[inline]
90 #[must_use]
91 fn sub(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="sse2")] {
94 Self { sse: sub_i32_m128i(self.sse, rhs.sse) }
95 } else if #[cfg(target_feature="simd128")] {
96 Self { simd: u32x4_sub(self.simd, rhs.simd) }
97 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98 unsafe {Self { neon: vsubq_u32(self.neon, rhs.neon) }}
99 } else {
100 Self { arr: [
101 self.arr[0].wrapping_sub(rhs.arr[0]),
102 self.arr[1].wrapping_sub(rhs.arr[1]),
103 self.arr[2].wrapping_sub(rhs.arr[2]),
104 self.arr[3].wrapping_sub(rhs.arr[3]),
105 ]}
106 }
107 }
108 }
109}
110
111impl Mul for u32x4 {
112 type Output = Self;
113 #[inline]
114 #[must_use]
115 fn mul(self, rhs: Self) -> Self::Output {
116 pick! {
117 if #[cfg(target_feature="sse4.1")] {
118 Self { sse: mul_32_m128i(self.sse, rhs.sse) }
119 } else if #[cfg(target_feature="simd128")] {
120 Self { simd: u32x4_mul(self.simd, rhs.simd) }
121 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
122 unsafe {Self { neon: vmulq_u32(self.neon, rhs.neon) }}
123 } else {
124 let arr1: [u32; 4] = cast(self);
125 let arr2: [u32; 4] = cast(rhs);
126 cast([
127 arr1[0].wrapping_mul(arr2[0]),
128 arr1[1].wrapping_mul(arr2[1]),
129 arr1[2].wrapping_mul(arr2[2]),
130 arr1[3].wrapping_mul(arr2[3]),
131 ])
132 }
133 }
134 }
135}
136
137impl Add<u32> for u32x4 {
138 type Output = Self;
139 #[inline]
140 #[must_use]
141 fn add(self, rhs: u32) -> Self::Output {
142 self.add(Self::splat(rhs))
143 }
144}
145
146impl Sub<u32> for u32x4 {
147 type Output = Self;
148 #[inline]
149 #[must_use]
150 fn sub(self, rhs: u32) -> Self::Output {
151 self.sub(Self::splat(rhs))
152 }
153}
154
155impl Mul<u32> for u32x4 {
156 type Output = Self;
157 #[inline]
158 #[must_use]
159 fn mul(self, rhs: u32) -> Self::Output {
160 self.mul(Self::splat(rhs))
161 }
162}
163
164impl Add<u32x4> for u32 {
165 type Output = u32x4;
166 #[inline]
167 #[must_use]
168 fn add(self, rhs: u32x4) -> Self::Output {
169 u32x4::splat(self).add(rhs)
170 }
171}
172
173impl Sub<u32x4> for u32 {
174 type Output = u32x4;
175 #[inline]
176 #[must_use]
177 fn sub(self, rhs: u32x4) -> Self::Output {
178 u32x4::splat(self).sub(rhs)
179 }
180}
181
182impl Mul<u32x4> for u32 {
183 type Output = u32x4;
184 #[inline]
185 #[must_use]
186 fn mul(self, rhs: u32x4) -> Self::Output {
187 u32x4::splat(self).mul(rhs)
188 }
189}
190
191impl BitAnd for u32x4 {
192 type Output = Self;
193 #[inline]
194 #[must_use]
195 fn bitand(self, rhs: Self) -> Self::Output {
196 pick! {
197 if #[cfg(target_feature="sse2")] {
198 Self { sse: bitand_m128i(self.sse, rhs.sse) }
199 } else if #[cfg(target_feature="simd128")] {
200 Self { simd: v128_and(self.simd, rhs.simd) }
201 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
202 unsafe {Self { neon: vandq_u32(self.neon, rhs.neon) }}
203 } else {
204 Self { arr: [
205 self.arr[0].bitand(rhs.arr[0]),
206 self.arr[1].bitand(rhs.arr[1]),
207 self.arr[2].bitand(rhs.arr[2]),
208 self.arr[3].bitand(rhs.arr[3]),
209 ]}
210 }
211 }
212 }
213}
214
215impl BitOr for u32x4 {
216 type Output = Self;
217 #[inline]
218 #[must_use]
219 fn bitor(self, rhs: Self) -> Self::Output {
220 pick! {
221 if #[cfg(target_feature="sse2")] {
222 Self { sse: bitor_m128i(self.sse, rhs.sse) }
223 } else if #[cfg(target_feature="simd128")] {
224 Self { simd: v128_or(self.simd, rhs.simd) }
225 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
226 unsafe {Self { neon: vorrq_u32(self.neon, rhs.neon) }}
227 } else {
228 Self { arr: [
229 self.arr[0].bitor(rhs.arr[0]),
230 self.arr[1].bitor(rhs.arr[1]),
231 self.arr[2].bitor(rhs.arr[2]),
232 self.arr[3].bitor(rhs.arr[3]),
233 ]}
234 }
235 }
236 }
237}
238
239impl BitXor for u32x4 {
240 type Output = Self;
241 #[inline]
242 #[must_use]
243 fn bitxor(self, rhs: Self) -> Self::Output {
244 pick! {
245 if #[cfg(target_feature="sse2")] {
246 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
247 } else if #[cfg(target_feature="simd128")] {
248 Self { simd: v128_xor(self.simd, rhs.simd) }
249 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
250 unsafe {Self { neon: veorq_u32(self.neon, rhs.neon) }}
251 } else {
252 Self { arr: [
253 self.arr[0].bitxor(rhs.arr[0]),
254 self.arr[1].bitxor(rhs.arr[1]),
255 self.arr[2].bitxor(rhs.arr[2]),
256 self.arr[3].bitxor(rhs.arr[3]),
257 ]}
258 }
259 }
260 }
261}
262
263macro_rules! impl_shl_t_for_u32x4 {
264 ($($shift_type:ty),+ $(,)?) => {
265 $(impl Shl<$shift_type> for u32x4 {
266 type Output = Self;
267 #[inline]
269 #[must_use]
270 fn shl(self, rhs: $shift_type) -> Self::Output {
271 pick! {
272 if #[cfg(target_feature="sse2")] {
273 let shift = cast([rhs as u64, 0]);
274 Self { sse: shl_all_u32_m128i(self.sse, shift) }
275 } else if #[cfg(target_feature="simd128")] {
276 Self { simd: u32x4_shl(self.simd, rhs as u32) }
277 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
278 unsafe {Self { neon: vshlq_u32(self.neon, vmovq_n_s32(rhs as i32)) }}
279 } else {
280 let u = rhs as u64;
281 Self { arr: [
282 self.arr[0] << u,
283 self.arr[1] << u,
284 self.arr[2] << u,
285 self.arr[3] << u,
286 ]}
287 }
288 }
289 }
290 })+
291 };
292}
293impl_shl_t_for_u32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
294
295macro_rules! impl_shr_t_for_u32x4 {
296 ($($shift_type:ty),+ $(,)?) => {
297 $(impl Shr<$shift_type> for u32x4 {
298 type Output = Self;
299 #[inline]
301 #[must_use]
302 fn shr(self, rhs: $shift_type) -> Self::Output {
303 pick! {
304 if #[cfg(target_feature="sse2")] {
305 let shift = cast([rhs as u64, 0]);
306 Self { sse: shr_all_u32_m128i(self.sse, shift) }
307 } else if #[cfg(target_feature="simd128")] {
308 Self { simd: u32x4_shr(self.simd, rhs as u32) }
309 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
310 unsafe {Self { neon: vshlq_u32(self.neon, vmovq_n_s32( -(rhs as i32))) }}
311 } else {
312 let u = rhs as u64;
313 Self { arr: [
314 self.arr[0] >> u,
315 self.arr[1] >> u,
316 self.arr[2] >> u,
317 self.arr[3] >> u,
318 ]}
319 }
320 }
321 }
322 })+
323 };
324}
325impl_shr_t_for_u32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
326
327impl Shr<u32x4> for u32x4 {
333 type Output = Self;
334 #[inline]
335 #[must_use]
336 fn shr(self, rhs: u32x4) -> Self::Output {
337 pick! {
338 if #[cfg(target_feature="avx2")] {
339 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
341 Self { sse: shr_each_u32_m128i(self.sse, shift_by) }
342 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
343 unsafe {
344 let shift_by = vnegq_s32(vreinterpretq_s32_u32(vandq_u32(rhs.neon, vmovq_n_u32(31))));
347 Self { neon: vshlq_u32(self.neon, shift_by) }
348 }
349 } else {
350 let arr: [u32; 4] = cast(self);
351 let rhs: [u32; 4] = cast(rhs);
352 cast([
353 arr[0].wrapping_shr(rhs[0]),
354 arr[1].wrapping_shr(rhs[1]),
355 arr[2].wrapping_shr(rhs[2]),
356 arr[3].wrapping_shr(rhs[3]),
357 ])
358 }
359 }
360 }
361}
362
363impl Shl<u32x4> for u32x4 {
369 type Output = Self;
370 #[inline]
371 #[must_use]
372 fn shl(self, rhs: u32x4) -> Self::Output {
373 pick! {
374 if #[cfg(target_feature="avx2")] {
375 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
377 Self { sse: shl_each_u32_m128i(self.sse, shift_by) }
378 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
379 unsafe {
380 let shift_by = vreinterpretq_s32_u32(vandq_u32(rhs.neon, vmovq_n_u32(31)));
382 Self { neon: vshlq_u32(self.neon, shift_by) }
383 }
384 } else {
385 let arr: [u32; 4] = cast(self);
386 let rhs: [u32; 4] = cast(rhs);
387 cast([
388 arr[0].wrapping_shl(rhs[0]),
389 arr[1].wrapping_shl(rhs[1]),
390 arr[2].wrapping_shl(rhs[2]),
391 arr[3].wrapping_shl(rhs[3]),
392 ])
393 }
394 }
395 }
396}
397
398impl u32x4 {
399 #[inline]
400 #[must_use]
401 pub const fn new(array: [u32; 4]) -> Self {
402 unsafe { core::intrinsics::transmute(array) }
403 }
404 #[inline]
405 #[must_use]
406 pub fn cmp_eq(self, rhs: Self) -> Self {
407 pick! {
408 if #[cfg(target_feature="sse2")] {
409 Self { sse: cmp_eq_mask_i32_m128i(self.sse, rhs.sse) }
410 } else if #[cfg(target_feature="simd128")] {
411 Self { simd: u32x4_eq(self.simd, rhs.simd) }
412 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
413 unsafe {Self { neon: vceqq_u32(self.neon, rhs.neon) }}
414 } else {
415 Self { arr: [
416 if self.arr[0] == rhs.arr[0] { u32::MAX } else { 0 },
417 if self.arr[1] == rhs.arr[1] { u32::MAX } else { 0 },
418 if self.arr[2] == rhs.arr[2] { u32::MAX } else { 0 },
419 if self.arr[3] == rhs.arr[3] { u32::MAX } else { 0 },
420 ]}
421 }
422 }
423 }
424 #[inline]
425 #[must_use]
426 pub fn cmp_gt(self, rhs: Self) -> Self {
427 pick! {
428 if #[cfg(target_feature="sse2")] {
429 let h = u32x4::splat(1 << 31);
431 Self { sse: cmp_gt_mask_i32_m128i((self ^ h).sse, (rhs ^ h).sse) }
432 } else if #[cfg(target_feature="simd128")] {
433 Self { simd: u32x4_gt(self.simd, rhs.simd) }
434 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
435 unsafe {Self { neon: vcgtq_u32(self.neon, rhs.neon) }}
436 } else {
437 Self { arr: [
438 if self.arr[0] > rhs.arr[0] { u32::MAX } else { 0 },
439 if self.arr[1] > rhs.arr[1] { u32::MAX } else { 0 },
440 if self.arr[2] > rhs.arr[2] { u32::MAX } else { 0 },
441 if self.arr[3] > rhs.arr[3] { u32::MAX } else { 0 },
442 ]}
443 }
444 }
445 }
446 #[inline]
447 #[must_use]
448 pub fn cmp_lt(self, rhs: Self) -> Self {
449 rhs.cmp_gt(self)
451 }
452
453 #[inline]
457 #[must_use]
458 pub fn mul_keep_high(self, rhs: Self) -> Self {
459 pick! {
460 if #[cfg(target_feature="avx2")] {
461 let a = convert_to_i64_m256i_from_u32_m128i(self.sse);
462 let b = convert_to_i64_m256i_from_u32_m128i(rhs.sse);
463 let r = mul_u64_low_bits_m256i(a, b);
464
465 let b : [u32;8] = cast(r);
467 cast([b[1],b[3],b[5],b[7]])
468 } else if #[cfg(target_feature="sse2")] {
469 let evenp = mul_widen_u32_odd_m128i(self.sse, rhs.sse);
470
471 let oddp = mul_widen_u32_odd_m128i(
472 shr_imm_u64_m128i::<32>(self.sse),
473 shr_imm_u64_m128i::<32>(rhs.sse));
474
475 let a : [u32;4]= cast(evenp);
477 let b : [u32;4]= cast(oddp);
478 cast([a[1],b[1],a[3],b[3]])
479
480 } else if #[cfg(target_feature="simd128")] {
481 let low = u64x2_extmul_low_u32x4(self.simd, rhs.simd);
482 let high = u64x2_extmul_high_u32x4(self.simd, rhs.simd);
483
484 Self { simd: u32x4_shuffle::<1, 3, 5, 7>(low, high) }
485 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
486 unsafe {
487 let l = vmull_u32(vget_low_u32(self.neon), vget_low_u32(rhs.neon));
488 let h = vmull_u32(vget_high_u32(self.neon), vget_high_u32(rhs.neon));
489 u32x4 { neon: vcombine_u32(vshrn_n_u64(l,32), vshrn_n_u64(h,32)) }
490 }
491 } else {
492 let a: [u32; 4] = cast(self);
493 let b: [u32; 4] = cast(rhs);
494 cast([
495 ((u64::from(a[0]) * u64::from(b[0])) >> 32) as u32,
496 ((u64::from(a[1]) * u64::from(b[1])) >> 32) as u32,
497 ((u64::from(a[2]) * u64::from(b[2])) >> 32) as u32,
498 ((u64::from(a[3]) * u64::from(b[3])) >> 32) as u32,
499 ])
500 }
501 }
502 }
503
504 #[inline]
510 #[must_use]
511 pub fn mul_widen(self, rhs: Self) -> u64x4 {
512 pick! {
513 if #[cfg(target_feature="avx2")] {
514 let a = convert_to_i64_m256i_from_i32_m128i(self.sse);
516 let b = convert_to_i64_m256i_from_i32_m128i(rhs.sse);
517 cast(mul_u64_low_bits_m256i(a, b))
518 } else if #[cfg(target_feature="sse2")] {
519 let evenp = mul_widen_u32_odd_m128i(self.sse, rhs.sse);
520
521 let oddp = mul_widen_u32_odd_m128i(
522 shr_imm_u64_m128i::<32>(self.sse),
523 shr_imm_u64_m128i::<32>(rhs.sse));
524
525 u64x4 {
526 a: u64x2 { sse: unpack_low_i64_m128i(evenp, oddp)},
527 b: u64x2 { sse: unpack_high_i64_m128i(evenp, oddp)}
528 }
529 } else if #[cfg(target_feature="simd128")] {
530 u64x4 {
531 a: u64x2 { simd: u64x2_extmul_low_u32x4(self.simd, rhs.simd) },
532 b: u64x2 { simd: u64x2_extmul_high_u32x4(self.simd, rhs.simd) },
533 }
534 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
535 unsafe {
536 u64x4 { a: u64x2 { neon: vmull_u32(vget_low_u32(self.neon), vget_low_u32(rhs.neon)) },
537 b: u64x2 { neon: vmull_u32(vget_high_u32(self.neon), vget_high_u32(rhs.neon)) } }
538 }
539 } else {
540 let a: [u32; 4] = cast(self);
541 let b: [u32; 4] = cast(rhs);
542 cast([
543 u64::from(a[0]) * u64::from(b[0]),
544 u64::from(a[1]) * u64::from(b[1]),
545 u64::from(a[2]) * u64::from(b[2]),
546 u64::from(a[3]) * u64::from(b[3]),
547 ])
548 }
549 }
550 }
551
552 #[inline]
553 #[must_use]
554 pub fn blend(self, t: Self, f: Self) -> Self {
555 pick! {
556 if #[cfg(target_feature="sse4.1")] {
557 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
558 } else if #[cfg(target_feature="simd128")] {
559 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
560 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
561 unsafe {Self { neon: vbslq_u32(self.neon, t.neon, f.neon) }}
562 } else {
563 generic_bit_blend(self, t, f)
564 }
565 }
566 }
567 #[inline]
568 #[must_use]
569 pub fn max(self, rhs: Self) -> Self {
570 pick! {
571 if #[cfg(target_feature="sse4.1")] {
572 Self { sse: max_u32_m128i(self.sse, rhs.sse) }
573 } else if #[cfg(target_feature="simd128")] {
574 Self { simd: u32x4_max(self.simd, rhs.simd) }
575 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
576 unsafe {Self { neon: vmaxq_u32(self.neon, rhs.neon) }}
577 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
578 unsafe {Self { neon: vmaxq_u16(self.neon, rhs.neon) }}
579 } else {
580 let arr: [u32; 4] = cast(self);
581 let rhs: [u32; 4] = cast(rhs);
582 cast([
583 arr[0].max(rhs[0]),
584 arr[1].max(rhs[1]),
585 arr[2].max(rhs[2]),
586 arr[3].max(rhs[3]),
587 ])
588 }
589 }
590 }
591 #[inline]
592 #[must_use]
593 pub fn min(self, rhs: Self) -> Self {
594 pick! {
595 if #[cfg(target_feature="sse4.1")] {
596 Self { sse: min_u32_m128i(self.sse, rhs.sse) }
597 } else if #[cfg(target_feature="simd128")] {
598 Self { simd: u32x4_min(self.simd, rhs.simd) }
599 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
600 unsafe {Self { neon: vminq_u32(self.neon, rhs.neon) }}
601 } else {
602 let arr: [u32; 4] = cast(self);
603 let rhs: [u32; 4] = cast(rhs);
604 cast([
605 arr[0].min(rhs[0]),
606 arr[1].min(rhs[1]),
607 arr[2].min(rhs[2]),
608 arr[3].min(rhs[3]),
609 ])
610 }
611 }
612 }
613
614 #[inline]
615 #[must_use]
616 pub fn any(self) -> bool {
617 pick! {
618 if #[cfg(target_feature="sse2")] {
619 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) != 0
620 } else if #[cfg(target_feature="simd128")] {
621 u32x4_bitmask(self.simd) != 0
622 } else {
623 let v : [u64;2] = cast(self);
624 ((v[0] | v[1]) & 0x8000000080000000) != 0
625 }
626 }
627 }
628
629 #[inline]
630 #[must_use]
631 pub fn all(self) -> bool {
632 pick! {
633 if #[cfg(target_feature="sse2")] {
634 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) == 0b1000100010001000
635 } else if #[cfg(target_feature="simd128")] {
636 u32x4_bitmask(self.simd) == 0b1111
637 } else {
638 let v : [u64;2] = cast(self);
639 (v[0] & v[1] & 0x8000000080000000) == 0x8000000080000000
640 }
641 }
642 }
643
644 #[inline]
645 #[must_use]
646 pub fn none(self) -> bool {
647 !self.any()
648 }
649
650 #[inline]
651 pub fn to_array(self) -> [u32; 4] {
652 cast(self)
653 }
654
655 #[inline]
656 pub fn as_array_ref(&self) -> &[u32; 4] {
657 cast_ref(self)
658 }
659
660 #[inline]
661 pub fn as_array_mut(&mut self) -> &mut [u32; 4] {
662 cast_mut(self)
663 }
664}