safe_arch/x86_x64/sse2.rs
1#![cfg(target_feature = "sse2")]
2
3use super::*;
4
5/// Lanewise `a + b` with lanes as `i8`.
6/// ```
7/// # use safe_arch::*;
8/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
9/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
10/// let c: [i8; 16] = add_i8_m128i(a, b).into();
11/// assert_eq!(c, [0, 12, 4, 16, 8, 20, 12, 24, 16, 28, -10, 32, 34, -10, 38, -114]);
12/// ```
13#[must_use]
14#[inline(always)]
15#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
16pub fn add_i8_m128i(a: m128i, b: m128i) -> m128i {
17 m128i(unsafe { _mm_add_epi8(a.0, b.0) })
18}
19
20/// Lanewise `a + b` with lanes as `i16`.
21/// ```
22/// # use safe_arch::*;
23/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
24/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
25/// let c: [i16; 8] = add_i16_m128i(a, b).into();
26/// assert_eq!(c, [6, 8, 10, 12, -16, -28, -40, 44]);
27/// ```
28#[must_use]
29#[inline(always)]
30#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
31pub fn add_i16_m128i(a: m128i, b: m128i) -> m128i {
32 m128i(unsafe { _mm_add_epi16(a.0, b.0) })
33}
34
35/// Lanewise `a + b` with lanes as `i32`.
36/// ```
37/// # use safe_arch::*;
38/// let a = m128i::from([1, 2, 3, 4]);
39/// let b = m128i::from([5, 6, 7, 8]);
40/// let c: [i32; 4] = add_i32_m128i(a, b).into();
41/// assert_eq!(c, [6, 8, 10, 12]);
42/// ```
43#[must_use]
44#[inline(always)]
45#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
46pub fn add_i32_m128i(a: m128i, b: m128i) -> m128i {
47 m128i(unsafe { _mm_add_epi32(a.0, b.0) })
48}
49
50/// Lanewise `a + b` with lanes as `i64`.
51/// ```
52/// # use safe_arch::*;
53/// let a = m128i::from([92_i64, 87]);
54/// let b = m128i::from([-9001_i64, 1]);
55/// let c: [i64; 2] = add_i64_m128i(a, b).into();
56/// assert_eq!(c, [-8909, 88]);
57/// ```
58#[must_use]
59#[inline(always)]
60#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
61pub fn add_i64_m128i(a: m128i, b: m128i) -> m128i {
62 m128i(unsafe { _mm_add_epi64(a.0, b.0) })
63}
64
65/// Lanewise `a + b`.
66/// ```
67/// # use safe_arch::*;
68/// let a = m128d::from_array([92.0, 87.5]);
69/// let b = m128d::from_array([100.0, -6.0]);
70/// let c = add_m128d(a, b).to_array();
71/// assert_eq!(c, [192.0, 81.5]);
72/// ```
73#[must_use]
74#[inline(always)]
75#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
76pub fn add_m128d(a: m128d, b: m128d) -> m128d {
77 m128d(unsafe { _mm_add_pd(a.0, b.0) })
78}
79
80/// Lowest lane `a + b`, high lane unchanged.
81/// ```
82/// # use safe_arch::*;
83/// let a = m128d::from_array([92.0, 87.5]);
84/// let b = m128d::from_array([100.0, -600.0]);
85/// let c = add_m128d_s(a, b).to_array();
86/// assert_eq!(c, [192.0, 87.5]);
87/// ```
88#[must_use]
89#[inline(always)]
90#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
91pub fn add_m128d_s(a: m128d, b: m128d) -> m128d {
92 m128d(unsafe { _mm_add_sd(a.0, b.0) })
93}
94
95/// Lanewise saturating `a + b` with lanes as `i8`.
96/// ```
97/// # use safe_arch::*;
98/// let a = m128i::from([
99/// i8::MAX, i8::MIN, 3, 4, -1, -2, -3, -4,
100/// 3, 4, -1, -2, -1, -2, -3, -4,
101/// ]);
102/// let b = m128i::from([
103/// i8::MAX, i8::MIN, 7, 8, -15, -26, -37, 48,
104/// 7, 8, -15, -26, -15, -26, -37, 48,
105/// ]);
106/// let c: [i8; 16] = add_saturating_i8_m128i(a, b).into();
107/// assert_eq!(
108/// c,
109/// [
110/// i8::MAX, i8::MIN, 10, 12, -16, -28, -40, 44,
111/// 10, 12, -16, -28, -16, -28, -40, 44
112/// ]
113/// );
114/// ```
115#[must_use]
116#[inline(always)]
117#[rustfmt::skip]
118#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
119pub fn add_saturating_i8_m128i(a: m128i, b: m128i) -> m128i {
120 m128i(unsafe { _mm_adds_epi8(a.0, b.0) })
121}
122
123/// Lanewise saturating `a + b` with lanes as `i16`.
124/// ```
125/// # use safe_arch::*;
126/// let a = m128i::from([i16::MAX, i16::MIN, 3, 4, -1, -2, -3, -4]);
127/// let b = m128i::from([i16::MAX, i16::MIN, 7, 8, -15, -26, -37, 48]);
128/// let c: [i16; 8] = add_saturating_i16_m128i(a, b).into();
129/// assert_eq!(c, [i16::MAX, i16::MIN, 10, 12, -16, -28, -40, 44]);
130/// ```
131#[must_use]
132#[inline(always)]
133#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
134pub fn add_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
135 m128i(unsafe { _mm_adds_epi16(a.0, b.0) })
136}
137
138/// Lanewise saturating `a + b` with lanes as `u8`.
139/// ```
140/// # use safe_arch::*;
141/// let a = m128i::from([
142/// u8::MAX, 0, 3, 4, 254, 2, 3, 4,
143/// 3, 4, 1, 2, 1, 2, 128, 4,
144/// ]);
145/// let b = m128i::from([
146/// u8::MAX, 0, 7, 8, 15, 26, 37, 48,
147/// 7, 8, 15, 26, 15, 26, 37, 48,
148/// ]);
149/// let c: [u8; 16] = add_saturating_u8_m128i(a, b).into();
150/// assert_eq!(
151/// c,
152/// [
153/// u8::MAX, 0, 10, 12, 255, 28, 40, 52,
154/// 10, 12, 16, 28, 16, 28, 165, 52
155/// ]
156/// );
157/// ```
158#[must_use]
159#[inline(always)]
160#[rustfmt::skip]
161#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
162pub fn add_saturating_u8_m128i(a: m128i, b: m128i) -> m128i {
163 m128i(unsafe { _mm_adds_epu8(a.0, b.0) })
164}
165
166/// Lanewise saturating `a + b` with lanes as `u16`.
167/// ```
168/// # use safe_arch::*;
169/// let a = m128i::from([u16::MAX, 0, 3, 4, 1, 2, 3, 4]);
170/// let b = m128i::from([u16::MAX, 0, 7, 8, 15, 26, 37, 48]);
171/// let c: [u16; 8] = add_saturating_u16_m128i(a, b).into();
172/// assert_eq!(c, [u16::MAX, 0, 10, 12, 16, 28, 40, 52]);
173/// ```
174#[must_use]
175#[inline(always)]
176#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
177pub fn add_saturating_u16_m128i(a: m128i, b: m128i) -> m128i {
178 m128i(unsafe { _mm_adds_epu16(a.0, b.0) })
179}
180
181/// Bitwise `a & b`.
182/// ```
183/// # use safe_arch::*;
184/// let a = m128d::from_array([1.0, 0.0]);
185/// let b = m128d::from_array([1.0, 1.0]);
186/// let c = bitand_m128d(a, b).to_array();
187/// assert_eq!(c, [1.0, 0.0]);
188/// ```
189#[must_use]
190#[inline(always)]
191#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
192pub fn bitand_m128d(a: m128d, b: m128d) -> m128d {
193 m128d(unsafe { _mm_and_pd(a.0, b.0) })
194}
195
196/// Bitwise `a & b`.
197/// ```
198/// # use safe_arch::*;
199/// let a = m128i::from([1, 0, 1, 0]);
200/// let b = m128i::from([1, 1, 0, 0]);
201/// let c: [i32; 4] = bitand_m128i(a, b).into();
202/// assert_eq!(c, [1, 0, 0, 0]);
203/// ```
204#[must_use]
205#[inline(always)]
206#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
207pub fn bitand_m128i(a: m128i, b: m128i) -> m128i {
208 m128i(unsafe { _mm_and_si128(a.0, b.0) })
209}
210
211/// Bitwise `(!a) & b`.
212/// ```
213/// # use safe_arch::*;
214/// let a = m128d::from_array([1.0, 0.0]);
215/// let b = m128d::from_array([1.0, 1.0]);
216/// let c = bitandnot_m128d(a, b).to_array();
217/// assert_eq!(c, [0.0, 1.0]);
218/// ```
219#[must_use]
220#[inline(always)]
221#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
222pub fn bitandnot_m128d(a: m128d, b: m128d) -> m128d {
223 m128d(unsafe { _mm_andnot_pd(a.0, b.0) })
224}
225
226/// Bitwise `(!a) & b`.
227/// ```
228/// # use safe_arch::*;
229/// let a = m128i::from([1, 0, 1, 0]);
230/// let b = m128i::from([1, 1, 0, 0]);
231/// let c: [i32; 4] = bitandnot_m128i(a, b).into();
232/// assert_eq!(c, [0, 1, 0, 0]);
233/// ```
234#[must_use]
235#[inline(always)]
236#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
237pub fn bitandnot_m128i(a: m128i, b: m128i) -> m128i {
238 m128i(unsafe { _mm_andnot_si128(a.0, b.0) })
239}
240
241/// Lanewise average of the `u8` values.
242/// ```
243/// # use safe_arch::*;
244/// let a = m128i::from([
245/// u8::MAX, 0, 3, 4, 254, 2, 3, 4,
246/// 3, 4, 1, 2, 1, 2, 128, 4,
247/// ]);
248/// let b = m128i::from([
249/// u8::MAX, 0, 7, 8, 15, 26, 37, 48,
250/// 7, 8, 15, 26, 15, 26, 37, 48,
251/// ]);
252/// let c: [u8; 16] = average_u8_m128i(a, b).into();
253/// assert_eq!(
254/// c,
255/// [
256/// u8::MAX, 0, 5, 6, 135, 14, 20, 26,
257/// 5, 6, 8, 14, 8, 14, 83, 26
258/// ]
259/// );
260/// ```
261#[must_use]
262#[inline(always)]
263#[rustfmt::skip]
264#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
265pub fn average_u8_m128i(a: m128i, b: m128i) -> m128i {
266 m128i(unsafe { _mm_avg_epu8(a.0, b.0) })
267}
268
269/// Lanewise average of the `u16` values.
270/// ```
271/// # use safe_arch::*;
272/// let a = m128i::from([u16::MAX, 0, 3, 4, 1, 2, 3, 4]);
273/// let b = m128i::from([u16::MAX, 0, 7, 8, 15, 26, 37, 48]);
274/// let c: [u16; 8] = average_u16_m128i(a, b).into();
275/// assert_eq!(c, [u16::MAX, 0, 5, 6, 8, 14, 20, 26]);
276/// ```
277#[must_use]
278#[inline(always)]
279#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
280pub fn average_u16_m128i(a: m128i, b: m128i) -> m128i {
281 m128i(unsafe { _mm_avg_epu16(a.0, b.0) })
282}
283
284/// Shifts all bits in the entire register left by a number of **bytes**.
285///
286/// ```
287/// # use safe_arch::*;
288/// let a = m128i::from(0x0000000B_0000000A_0000000F_11111111_u128);
289/// //
290/// let b: u128 = byte_shl_imm_u128_m128i::<1>(a).into();
291/// assert_eq!(b, 0x00000B00_00000A00_00000F11_11111100);
292/// ```
293#[must_use]
294#[inline(always)]
295#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
296pub fn byte_shl_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i {
297 m128i(unsafe { _mm_bslli_si128(a.0, IMM) })
298}
299
300/// Shifts all bits in the entire register right by a number of **bytes**.
301///
302/// ```
303/// # use safe_arch::*;
304/// let a = m128i::from(0x0000000B_0000000A_0000000F_11111111_u128);
305/// //
306/// let c: u128 = byte_shr_imm_u128_m128i::<1>(a).into();
307/// assert_eq!(c, 0x00000000_0B000000_0A000000_0F111111);
308/// ```
309#[must_use]
310#[inline(always)]
311#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
312pub fn byte_shr_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i {
313 m128i(unsafe { _mm_bsrli_si128(a.0, IMM) })
314}
315
316/// Bit-preserving cast to `m128` from `m128d`
317/// ```
318/// # use safe_arch::*;
319/// let a = m128d::from_array([1.0, 2.0]);
320/// let c: [u32; 4] = cast_to_m128_from_m128d(a).to_bits();
321/// assert_eq!(c, [0, 0x3FF00000, 0, 0x40000000]);
322/// ```
323#[must_use]
324#[inline(always)]
325#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
326pub fn cast_to_m128_from_m128d(a: m128d) -> m128 {
327 m128(unsafe { _mm_castpd_ps(a.0) })
328}
329
330/// Bit-preserving cast to `m128i` from `m128d`
331/// ```
332/// # use safe_arch::*;
333/// let a = m128d::from_array([1.0, 2.0]);
334/// let c: [u32; 4] = cast_to_m128i_from_m128d(a).into();
335/// assert_eq!(c, [0, 0x3FF00000, 0, 0x40000000]);
336/// ```
337#[must_use]
338#[inline(always)]
339#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
340pub fn cast_to_m128i_from_m128d(a: m128d) -> m128i {
341 m128i(unsafe { _mm_castpd_si128(a.0) })
342}
343
344/// Bit-preserving cast to `m128d` from `m128`
345/// ```
346/// # use safe_arch::*;
347/// let a = m128::from_array([1.0, 2.0, 3.0, 4.0]);
348/// let c: [u64; 2] = cast_to_m128d_from_m128(a).to_bits();
349/// assert_eq!(c, [0x400000003F800000, 0x4080000040400000]);
350/// ```
351#[must_use]
352#[inline(always)]
353#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
354pub fn cast_to_m128d_from_m128(a: m128) -> m128d {
355 m128d(unsafe { _mm_castps_pd(a.0) })
356}
357
358/// Bit-preserving cast to `m128i` from `m128`
359/// ```
360/// # use safe_arch::*;
361/// let a = m128::from_array([1.0, 2.0, 3.0, 4.0]);
362/// let c: [u32; 4] = cast_to_m128i_from_m128(a).into();
363/// assert_eq!(c, [0x3F800000, 0x40000000, 0x40400000, 0x40800000]);
364/// ```
365#[must_use]
366#[inline(always)]
367#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
368pub fn cast_to_m128i_from_m128(a: m128) -> m128i {
369 m128i(unsafe { _mm_castps_si128(a.0) })
370}
371
372/// Bit-preserving cast to `m128d` from `m128i`
373/// ```
374/// # use safe_arch::*;
375/// let a = m128i::from([1, 2, 3, 4]);
376/// let c: [u64; 2] = cast_to_m128d_from_m128i(a).to_bits();
377/// assert_eq!(c, [0x200000001, 0x400000003]);
378/// ```
379#[must_use]
380#[inline(always)]
381#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
382pub fn cast_to_m128d_from_m128i(a: m128i) -> m128d {
383 m128d(unsafe { _mm_castsi128_pd(a.0) })
384}
385
386/// Bit-preserving cast to `m128` from `m128i`
387/// ```
388/// # use safe_arch::*;
389/// let a = m128i::from([1, 2, 3, 4]);
390/// let c: [u32; 4] = cast_to_m128_from_m128i(a).to_bits();
391/// assert_eq!(c, [1, 2, 3, 4]);
392/// ```
393#[must_use]
394#[inline(always)]
395#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
396pub fn cast_to_m128_from_m128i(a: m128i) -> m128 {
397 m128(unsafe { _mm_castsi128_ps(a.0) })
398}
399
400/// Lanewise `a == b` with lanes as `i8`.
401///
402/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
403/// ```
404/// # use safe_arch::*;
405/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127]);
406/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
407/// let c: [i8; 16] = cmp_eq_mask_i8_m128i(a, b).into();
408/// assert_eq!(c, [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, -1]);
409/// ```
410#[must_use]
411#[inline(always)]
412#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
413pub fn cmp_eq_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
414 m128i(unsafe { _mm_cmpeq_epi8(a.0, b.0) })
415}
416
417/// Lanewise `a == b` with lanes as `i16`.
418///
419/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
420/// ```
421/// # use safe_arch::*;
422/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
423/// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
424/// let c: [i16; 8] = cmp_eq_mask_i16_m128i(a, b).into();
425/// assert_eq!(c, [0, -1, 0, -1, 0, 0, 0, -1]);
426/// ```
427#[must_use]
428#[inline(always)]
429#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
430pub fn cmp_eq_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
431 m128i(unsafe { _mm_cmpeq_epi16(a.0, b.0) })
432}
433
434/// Lanewise `a == b` with lanes as `i32`.
435///
436/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
437/// ```
438/// # use safe_arch::*;
439/// let a = m128i::from([1, 2, 3, 4]);
440/// let b = m128i::from([5, 2, 7, 4]);
441/// let c: [i32; 4] = cmp_eq_mask_i32_m128i(a, b).into();
442/// assert_eq!(c, [0, -1, 0, -1]);
443/// ```
444#[must_use]
445#[inline(always)]
446#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
447pub fn cmp_eq_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
448 m128i(unsafe { _mm_cmpeq_epi32(a.0, b.0) })
449}
450
451/// Lanewise `a == b`, mask output.
452///
453/// Mask output.
454/// ```
455/// # use safe_arch::*;
456/// let a = m128d::from_array([1.0, 0.0]);
457/// let b = m128d::from_array([1.0, 1.0]);
458/// let c = cmp_eq_mask_m128d(a, b).to_bits();
459/// assert_eq!(c, [u64::MAX, 0]);
460/// ```
461#[must_use]
462#[inline(always)]
463#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
464pub fn cmp_eq_mask_m128d(a: m128d, b: m128d) -> m128d {
465 m128d(unsafe { _mm_cmpeq_pd(a.0, b.0) })
466}
467
468/// Low lane `a == b`, other lanes unchanged.
469///
470/// Mask output.
471/// ```
472/// # use safe_arch::*;
473/// let a = m128d::from_array([1.0, 5.0]);
474/// let b = m128d::from_array([1.0, 1.0]);
475/// let c = cmp_eq_mask_m128d_s(a, b).to_bits();
476/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
477/// ```
478#[must_use]
479#[inline(always)]
480#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
481pub fn cmp_eq_mask_m128d_s(a: m128d, b: m128d) -> m128d {
482 m128d(unsafe { _mm_cmpeq_sd(a.0, b.0) })
483}
484
485/// Lanewise `a >= b`.
486///
487/// Mask output.
488/// ```
489/// # use safe_arch::*;
490/// let a = m128d::from_array([3.0, 1.0]);
491/// let b = m128d::from_array([1.0, 1.0]);
492/// let c = cmp_ge_mask_m128d(a, b).to_bits();
493/// assert_eq!(c, [u64::MAX, u64::MAX]);
494/// ```
495#[must_use]
496#[inline(always)]
497#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
498pub fn cmp_ge_mask_m128d(a: m128d, b: m128d) -> m128d {
499 m128d(unsafe { _mm_cmpge_pd(a.0, b.0) })
500}
501
502/// Low lane `a >= b`, other lanes unchanged.
503///
504/// Mask output.
505/// ```
506/// # use safe_arch::*;
507/// let a = m128d::from_array([1.0, 5.0]);
508/// let b = m128d::from_array([1.0, 1.0]);
509/// let c = cmp_ge_mask_m128d_s(a, b).to_bits();
510/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
511/// ```
512#[must_use]
513#[inline(always)]
514#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
515pub fn cmp_ge_mask_m128d_s(a: m128d, b: m128d) -> m128d {
516 m128d(unsafe { _mm_cmpge_sd(a.0, b.0) })
517}
518
519/// Lanewise `a > b` with lanes as `i8`.
520///
521/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
522/// ```
523/// # use safe_arch::*;
524/// let a = m128i::from([1_i8, 1, 20, 3, 40, 5, 60, 7, 80, 9, 10, 11, 12, 13, 14, 127]);
525/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 120]);
526/// let c: [i8; 16] = cmp_gt_mask_i8_m128i(a, b).into();
527/// assert_eq!(c, [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1]);
528/// ```
529#[must_use]
530#[inline(always)]
531#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
532pub fn cmp_gt_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
533 m128i(unsafe { _mm_cmpgt_epi8(a.0, b.0) })
534}
535
536/// Lanewise `a > b` with lanes as `i16`.
537///
538/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
539/// ```
540/// # use safe_arch::*;
541/// let a = m128i::from([1_i16, 20, 3, 40, -1, -2, -3, 0]);
542/// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
543/// let c: [i16; 8] = cmp_gt_mask_i16_m128i(a, b).into();
544/// assert_eq!(c, [0, -1, 0, -1, -1, -1, -1, -1]);
545/// ```
546#[must_use]
547#[inline(always)]
548#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
549pub fn cmp_gt_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
550 m128i(unsafe { _mm_cmpgt_epi16(a.0, b.0) })
551}
552
553/// Lanewise `a > b` with lanes as `i32`.
554///
555/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
556/// ```
557/// # use safe_arch::*;
558/// let a = m128i::from([1, 20, 7, 40]);
559/// let b = m128i::from([5, 2, 7, 4]);
560/// let c: [i32; 4] = cmp_gt_mask_i32_m128i(a, b).into();
561/// assert_eq!(c, [0, -1, 0, -1]);
562/// ```
563#[must_use]
564#[inline(always)]
565#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
566pub fn cmp_gt_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
567 m128i(unsafe { _mm_cmpgt_epi32(a.0, b.0) })
568}
569
570/// Lanewise `a > b`.
571///
572/// Mask output.
573/// ```
574/// # use safe_arch::*;
575/// let a = m128d::from_array([2.0, 0.0]);
576/// let b = m128d::from_array([1.0, 1.0]);
577/// let c = cmp_gt_mask_m128d(a, b).to_bits();
578/// assert_eq!(c, [u64::MAX, 0]);
579/// ```
580#[must_use]
581#[inline(always)]
582#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
583pub fn cmp_gt_mask_m128d(a: m128d, b: m128d) -> m128d {
584 m128d(unsafe { _mm_cmpgt_pd(a.0, b.0) })
585}
586
587/// Low lane `a > b`, other lanes unchanged.
588///
589/// Mask output.
590/// ```
591/// # use safe_arch::*;
592/// let a = m128d::from_array([2.0, 5.0]);
593/// let b = m128d::from_array([1.0, 1.0]);
594/// let c = cmp_gt_mask_m128d_s(a, b).to_bits();
595/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
596/// ```
597#[must_use]
598#[inline(always)]
599#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
600pub fn cmp_gt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
601 m128d(unsafe { _mm_cmpgt_sd(a.0, b.0) })
602}
603
604/// Lanewise `a <= b`.
605///
606/// Mask output.
607/// ```
608/// # use safe_arch::*;
609/// let a = m128d::from_array([0.0, 1.0]);
610/// let b = m128d::from_array([1.0, 1.0]);
611/// let c = cmp_le_mask_m128d(a, b).to_bits();
612/// assert_eq!(c, [u64::MAX, u64::MAX]);
613/// ```
614#[must_use]
615#[inline(always)]
616#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
617pub fn cmp_le_mask_m128d(a: m128d, b: m128d) -> m128d {
618 m128d(unsafe { _mm_cmple_pd(a.0, b.0) })
619}
620
621/// Low lane `a <= b`, other lanes unchanged.
622///
623/// Mask output.
624/// ```
625/// # use safe_arch::*;
626/// let a = m128d::from_array([0.0, 5.0]);
627/// let b = m128d::from_array([1.0, 1.0]);
628/// let c = cmp_le_mask_m128d_s(a, b).to_bits();
629/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
630/// ```
631#[must_use]
632#[inline(always)]
633#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
634pub fn cmp_le_mask_m128d_s(a: m128d, b: m128d) -> m128d {
635 m128d(unsafe { _mm_cmple_sd(a.0, b.0) })
636}
637
638/// Lanewise `a < b` with lanes as `i8`.
639///
640/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
641/// ```
642/// # use safe_arch::*;
643/// let a = m128i::from([1_i8, 1, 20, 3, 40, 5, 60, 7, 80, 9, 10, 11, 12, 13, 14, 127]);
644/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 120]);
645/// let c: [i8; 16] = cmp_lt_mask_i8_m128i(a, b).into();
646/// assert_eq!(c, [0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0]);
647/// ```
648#[must_use]
649#[inline(always)]
650#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
651pub fn cmp_lt_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
652 m128i(unsafe { _mm_cmplt_epi8(a.0, b.0) })
653}
654
655/// Lanewise `a < b` with lanes as `i16`.
656///
657/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
658/// ```
659/// # use safe_arch::*;
660/// let a = m128i::from([1_i16, 20, 3, 40, -1, -2, -3, 0]);
661/// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
662/// let c: [i16; 8] = cmp_lt_mask_i16_m128i(a, b).into();
663/// assert_eq!(c, [-1, 0, -1, 0, 0, 0, 0, 0]);
664/// ```
665#[must_use]
666#[inline(always)]
667#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
668pub fn cmp_lt_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
669 m128i(unsafe { _mm_cmplt_epi16(a.0, b.0) })
670}
671
672/// Lanewise `a < b` with lanes as `i32`.
673///
674/// All bits 1 for true (`-1`), all bit 0 for false (`0`).
675/// ```
676/// # use safe_arch::*;
677/// let a = m128i::from([1, 20, 7, 40]);
678/// let b = m128i::from([5, 2, 7, 4]);
679/// let c: [i32; 4] = cmp_lt_mask_i32_m128i(a, b).into();
680/// assert_eq!(c, [-1, 0, 0, 0]);
681/// ```
682#[must_use]
683#[inline(always)]
684#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
685pub fn cmp_lt_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
686 m128i(unsafe { _mm_cmplt_epi32(a.0, b.0) })
687}
688
689/// Lanewise `a < b`.
690///
691/// Mask output.
692/// ```
693/// # use safe_arch::*;
694/// let a = m128d::from_array([0.0, 7.0]);
695/// let b = m128d::from_array([1.0, 1.0]);
696/// let c = cmp_lt_mask_m128d(a, b).to_bits();
697/// assert_eq!(c, [u64::MAX, 0]);
698/// ```
699#[must_use]
700#[inline(always)]
701#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
702pub fn cmp_lt_mask_m128d(a: m128d, b: m128d) -> m128d {
703 m128d(unsafe { _mm_cmplt_pd(a.0, b.0) })
704}
705
706/// Low lane `a < b`, other lane unchanged.
707///
708/// Mask output.
709/// ```
710/// # use safe_arch::*;
711/// let a = m128d::from_array([0.0, 5.0]);
712/// let b = m128d::from_array([1.0, 1.0]);
713/// let c = cmp_lt_mask_m128d_s(a, b).to_bits();
714/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
715/// ```
716#[must_use]
717#[inline(always)]
718#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
719pub fn cmp_lt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
720 m128d(unsafe { _mm_cmplt_sd(a.0, b.0) })
721}
722
723/// Lanewise `a != b`.
724///
725/// Mask output.
726/// ```
727/// # use safe_arch::*;
728/// let a = m128d::from_array([3.0, 1.0]);
729/// let b = m128d::from_array([1.0, 1.0]);
730/// let c = cmp_neq_mask_m128d(a, b).to_bits();
731/// assert_eq!(c, [u64::MAX, 0]);
732/// ```
733#[must_use]
734#[inline(always)]
735#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
736pub fn cmp_neq_mask_m128d(a: m128d, b: m128d) -> m128d {
737 m128d(unsafe { _mm_cmpneq_pd(a.0, b.0) })
738}
739
740/// Low lane `a != b`, other lane unchanged.
741///
742/// Mask output.
743/// ```
744/// # use safe_arch::*;
745/// let a = m128d::from_array([2.0, 5.0]);
746/// let b = m128d::from_array([1.0, 1.0]);
747/// let c = cmp_neq_mask_m128d_s(a, b).to_bits();
748/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
749/// ```
750#[must_use]
751#[inline(always)]
752#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
753pub fn cmp_neq_mask_m128d_s(a: m128d, b: m128d) -> m128d {
754 m128d(unsafe { _mm_cmpneq_sd(a.0, b.0) })
755}
756
757/// Lanewise `!(a >= b)`.
758///
759/// Mask output.
760/// ```
761/// # use safe_arch::*;
762/// let a = m128d::from_array([3.0, 0.0]);
763/// let b = m128d::from_array([1.0, 1.0]);
764/// let c = cmp_nge_mask_m128d(a, b).to_bits();
765/// assert_eq!(c, [0, u64::MAX]);
766/// ```
767#[must_use]
768#[inline(always)]
769#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
770pub fn cmp_nge_mask_m128d(a: m128d, b: m128d) -> m128d {
771 m128d(unsafe { _mm_cmpnge_pd(a.0, b.0) })
772}
773
774/// Low lane `!(a >= b)`, other lane unchanged.
775///
776/// Mask output.
777/// ```
778/// # use safe_arch::*;
779/// let a = m128d::from_array([2.0, 5.0]);
780/// let b = m128d::from_array([1.0, 1.0]);
781/// let c = cmp_nge_mask_m128d_s(a, b).to_bits();
782/// assert_eq!(c, [0, 5_f64.to_bits()]);
783/// ```
784#[must_use]
785#[inline(always)]
786#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
787pub fn cmp_nge_mask_m128d_s(a: m128d, b: m128d) -> m128d {
788 m128d(unsafe { _mm_cmpnge_sd(a.0, b.0) })
789}
790
791/// Lanewise `!(a > b)`.
792///
793/// Mask output.
794/// ```
795/// # use safe_arch::*;
796/// let a = m128d::from_array([3.0, 0.0]);
797/// let b = m128d::from_array([1.0, 1.0]);
798/// let c = cmp_ngt_mask_m128d(a, b).to_bits();
799/// assert_eq!(c, [0, u64::MAX]);
800/// ```
801#[must_use]
802#[inline(always)]
803#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
804pub fn cmp_ngt_mask_m128d(a: m128d, b: m128d) -> m128d {
805 m128d(unsafe { _mm_cmpngt_pd(a.0, b.0) })
806}
807
808/// Low lane `!(a > b)`, other lane unchanged.
809///
810/// Mask output.
811/// ```
812/// # use safe_arch::*;
813/// let a = m128d::from_array([2.0, 5.0]);
814/// let b = m128d::from_array([1.0, 1.0]);
815/// let c = cmp_ngt_mask_m128d_s(a, b).to_bits();
816/// assert_eq!(c, [0, 5_f64.to_bits()]);
817/// ```
818#[must_use]
819#[inline(always)]
820#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
821pub fn cmp_ngt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
822 m128d(unsafe { _mm_cmpngt_sd(a.0, b.0) })
823}
824
825/// Lanewise `!(a <= b)`.
826///
827/// Mask output.
828/// ```
829/// # use safe_arch::*;
830/// let a = m128d::from_array([3.0, 0.0]);
831/// let b = m128d::from_array([1.0, 1.0]);
832/// let c = cmp_nle_mask_m128d(a, b).to_bits();
833/// assert_eq!(c, [u64::MAX, 0]);
834/// ```
835#[must_use]
836#[inline(always)]
837#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
838pub fn cmp_nle_mask_m128d(a: m128d, b: m128d) -> m128d {
839 m128d(unsafe { _mm_cmpnle_pd(a.0, b.0) })
840}
841
842/// Low lane `!(a <= b)`, other lane unchanged.
843///
844/// Mask output.
845/// ```
846/// # use safe_arch::*;
847/// let a = m128d::from_array([2.0, 5.0]);
848/// let b = m128d::from_array([1.0, 1.0]);
849/// let c = cmp_nle_mask_m128d_s(a, b).to_bits();
850/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
851/// ```
852#[must_use]
853#[inline(always)]
854#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
855pub fn cmp_nle_mask_m128d_s(a: m128d, b: m128d) -> m128d {
856 m128d(unsafe { _mm_cmpnle_sd(a.0, b.0) })
857}
858
859/// Lanewise `!(a < b)`.
860///
861/// Mask output.
862/// ```
863/// # use safe_arch::*;
864/// let a = m128d::from_array([3.0, 0.0]);
865/// let b = m128d::from_array([1.0, 1.0]);
866/// let c = cmp_nlt_mask_m128d(a, b).to_bits();
867/// assert_eq!(c, [u64::MAX, 0]);
868/// ```
869#[must_use]
870#[inline(always)]
871#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
872pub fn cmp_nlt_mask_m128d(a: m128d, b: m128d) -> m128d {
873 m128d(unsafe { _mm_cmpnlt_pd(a.0, b.0) })
874}
875
876/// Low lane `!(a < b)`, other lane unchanged.
877///
878/// Mask output.
879/// ```
880/// # use safe_arch::*;
881/// let a = m128d::from_array([2.0, 5.0]);
882/// let b = m128d::from_array([1.0, 1.0]);
883/// let c = cmp_nlt_mask_m128d_s(a, b).to_bits();
884/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
885/// ```
886#[must_use]
887#[inline(always)]
888#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
889pub fn cmp_nlt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
890 m128d(unsafe { _mm_cmpnlt_sd(a.0, b.0) })
891}
892
893/// Lanewise `(!a.is_nan()) & (!b.is_nan())`.
894///
895/// Mask output.
896/// ```
897/// # use safe_arch::*;
898/// let a = m128d::from_array([3.0, f64::NAN]);
899/// let b = m128d::from_array([1.0, 1.0]);
900/// let c = cmp_ordered_mask_m128d(a, b).to_bits();
901/// assert_eq!(c, [u64::MAX, 0]);
902/// ```
903#[must_use]
904#[inline(always)]
905#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
906pub fn cmp_ordered_mask_m128d(a: m128d, b: m128d) -> m128d {
907 m128d(unsafe { _mm_cmpord_pd(a.0, b.0) })
908}
909
910/// Low lane `(!a.is_nan()) & (!b.is_nan())`, other lane unchanged.
911///
912/// Mask output.
913/// ```
914/// # use safe_arch::*;
915/// let a = m128d::from_array([2.0, 5.0]);
916/// let b = m128d::from_array([1.0, 1.0]);
917/// let c = cmp_ordered_mask_m128d_s(a, b).to_bits();
918/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
919/// ```
920#[must_use]
921#[inline(always)]
922#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
923pub fn cmp_ordered_mask_m128d_s(a: m128d, b: m128d) -> m128d {
924 m128d(unsafe { _mm_cmpord_sd(a.0, b.0) })
925}
926
927/// Lanewise `a.is_nan() | b.is_nan()`.
928///
929/// Mask output.
930/// ```
931/// # use safe_arch::*;
932/// let a = m128d::from_array([f64::NAN, 0.0]);
933/// let b = m128d::from_array([1.0, 1.0]);
934/// let c = cmp_unord_mask_m128d(a, b).to_bits();
935/// assert_eq!(c, [u64::MAX, 0]);
936/// ```
937#[must_use]
938#[inline(always)]
939#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
940pub fn cmp_unord_mask_m128d(a: m128d, b: m128d) -> m128d {
941 m128d(unsafe { _mm_cmpunord_pd(a.0, b.0) })
942}
943
944/// Low lane `a.is_nan() | b.is_nan()`, other lane unchanged.
945///
946/// Mask output.
947/// ```
948/// # use safe_arch::*;
949/// let a = m128d::from_array([f64::NAN, 5.0]);
950/// let b = m128d::from_array([1.0, 1.0]);
951/// let c = cmp_unord_mask_m128d_s(a, b).to_bits();
952/// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
953/// ```
954#[must_use]
955#[inline(always)]
956#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
957pub fn cmp_unord_mask_m128d_s(a: m128d, b: m128d) -> m128d {
958 m128d(unsafe { _mm_cmpunord_sd(a.0, b.0) })
959}
960
961/// Low lane `f64` equal to.
962///
963/// `i32` output.
964/// ```
965/// # use safe_arch::*;
966/// let a = m128d::from_array([1.0, 5.0]);
967/// let b = m128d::from_array([1.0, 1.0]);
968/// assert_eq!(1_i32, cmp_eq_i32_m128d_s(a, b));
969/// ```
970#[must_use]
971#[inline(always)]
972#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
973pub fn cmp_eq_i32_m128d_s(a: m128d, b: m128d) -> i32 {
974 unsafe { _mm_comieq_sd(a.0, b.0) }
975}
976
977/// Low lane `f64` greater than or equal to.
978///
979/// `i32` output.
980/// ```
981/// # use safe_arch::*;
982/// let a = m128d::from_array([1.0, 5.0]);
983/// let b = m128d::from_array([1.0, 1.0]);
984/// assert_eq!(1_i32, cmp_ge_i32_m128d_s(a, b));
985/// ```
986#[must_use]
987#[inline(always)]
988#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
989pub fn cmp_ge_i32_m128d_s(a: m128d, b: m128d) -> i32 {
990 unsafe { _mm_comige_sd(a.0, b.0) }
991}
992
993/// Low lane `f64` greater than.
994///
995/// `i32` output.
996/// ```
997/// # use safe_arch::*;
998/// let a = m128d::from_array([1.0, 5.0]);
999/// let b = m128d::from_array([1.0, 1.0]);
1000/// assert_eq!(1_i32, cmp_ge_i32_m128d_s(a, b));
1001/// ```
1002#[must_use]
1003#[inline(always)]
1004#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1005pub fn cmp_gt_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1006 unsafe { _mm_comigt_sd(a.0, b.0) }
1007}
1008
1009/// Low lane `f64` less than or equal to.
1010///
1011/// `i32` output.
1012/// ```
1013/// # use safe_arch::*;
1014/// let a = m128d::from_array([1.0, 5.0]);
1015/// let b = m128d::from_array([1.0, 1.0]);
1016/// assert_eq!(1_i32, cmp_le_i32_m128d_s(a, b));
1017/// ```
1018#[must_use]
1019#[inline(always)]
1020#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1021pub fn cmp_le_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1022 unsafe { _mm_comile_sd(a.0, b.0) }
1023}
1024
1025/// Low lane `f64` less than.
1026///
1027/// `i32` output.
1028/// ```
1029/// # use safe_arch::*;
1030/// let a = m128d::from_array([0.0, 5.0]);
1031/// let b = m128d::from_array([1.0, 1.0]);
1032/// assert_eq!(1_i32, cmp_lt_i32_m128d_s(a, b));
1033/// ```
1034#[must_use]
1035#[inline(always)]
1036#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1037pub fn cmp_lt_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1038 unsafe { _mm_comilt_sd(a.0, b.0) }
1039}
1040
1041/// Low lane `f64` less than.
1042///
1043/// `i32` output.
1044/// ```
1045/// # use safe_arch::*;
1046/// let a = m128d::from_array([0.0, 5.0]);
1047/// let b = m128d::from_array([1.0, 1.0]);
1048/// assert_eq!(1_i32, cmp_neq_i32_m128d_s(a, b));
1049/// ```
1050#[must_use]
1051#[inline(always)]
1052#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1053pub fn cmp_neq_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1054 unsafe { _mm_comineq_sd(a.0, b.0) }
1055}
1056
1057/// Rounds the lower two `i32` lanes to two `f64` lanes.
1058/// ```
1059/// # use safe_arch::*;
1060/// let a = m128i::from([1, 2, 3, 4]);
1061/// let b = convert_to_m128d_from_lower2_i32_m128i(a);
1062/// let c = m128d::from_array([1.0, 2.0]);
1063/// assert_eq!(b.to_bits(), c.to_bits());
1064/// ```
1065/// * **Intrinsic:** [`_mm_cvtepi32_pd`]
1066/// * **Assembly:** `cvtdq2pd xmm, xmm`
1067#[must_use]
1068#[inline(always)]
1069#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1070pub fn convert_to_m128d_from_lower2_i32_m128i(a: m128i) -> m128d {
1071 m128d(unsafe { _mm_cvtepi32_pd(a.0) })
1072}
1073
1074/// Rounds the four `i32` lanes to four `f32` lanes.
1075/// ```
1076/// # use safe_arch::*;
1077/// let a = m128i::from([1, 2, 3, 4]);
1078/// let b = convert_to_m128_from_i32_m128i(a);
1079/// let c = m128::from_array([1.0, 2.0, 3.0, 4.0]);
1080/// assert_eq!(b.to_bits(), c.to_bits());
1081/// ```
1082/// * **Intrinsic:** [`_mm_cvtepi32_ps`]
1083/// * **Assembly:** `cvtdq2ps xmm, xmm`
1084#[must_use]
1085#[inline(always)]
1086#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1087pub fn convert_to_m128_from_i32_m128i(a: m128i) -> m128 {
1088 m128(unsafe { _mm_cvtepi32_ps(a.0) })
1089}
1090
1091/// Rounds the two `f64` lanes to the low two `i32` lanes.
1092/// ```
1093/// # use safe_arch::*;
1094/// let a = m128d::from_array([1.0, 2.5]);
1095/// let b = convert_to_i32_m128i_from_m128d(a);
1096/// let c: [i32; 4] = b.into();
1097/// assert_eq!(c, [1, 2, 0, 0]);
1098/// ```
1099/// * **Intrinsic:** [`_mm_cvtpd_epi32`]
1100/// * **Assembly:** `cvtpd2dq xmm, xmm`
1101#[must_use]
1102#[inline(always)]
1103#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1104pub fn convert_to_i32_m128i_from_m128d(a: m128d) -> m128i {
1105 m128i(unsafe { _mm_cvtpd_epi32(a.0) })
1106}
1107
1108/// Rounds the two `f64` lanes to the low two `f32` lanes.
1109/// ```
1110/// # use safe_arch::*;
1111/// let a = m128d::from_array([1.0, 2.5]);
1112/// let b = convert_to_m128_from_m128d(a);
1113/// assert_eq!(b.to_bits(), [1_f32.to_bits(), 2.5_f32.to_bits(), 0, 0]);
1114/// ```
1115/// * **Intrinsic:** [`_mm_cvtpd_ps`]
1116/// * **Assembly:** `cvtpd2ps xmm, xmm`
1117#[must_use]
1118#[inline(always)]
1119#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1120pub fn convert_to_m128_from_m128d(a: m128d) -> m128 {
1121 m128(unsafe { _mm_cvtpd_ps(a.0) })
1122}
1123
1124/// Rounds the `f32` lanes to `i32` lanes.
1125/// ```
1126/// # use safe_arch::*;
1127/// let a = m128::from_array([1.0, 2.5, 3.0, 4.0]);
1128/// let b = convert_to_i32_m128i_from_m128(a);
1129/// let c: [i32; 4] = b.into();
1130/// assert_eq!(c, [1, 2, 3, 4]);
1131/// ```
1132/// * **Intrinsic:** [`_mm_cvtps_epi32`]
1133/// * **Assembly:** `cvtps2dq xmm, xmm`
1134#[must_use]
1135#[inline(always)]
1136#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1137pub fn convert_to_i32_m128i_from_m128(a: m128) -> m128i {
1138 m128i(unsafe { _mm_cvtps_epi32(a.0) })
1139}
1140
1141/// Rounds the two `f64` lanes to the low two `f32` lanes.
1142/// ```
1143/// # use safe_arch::*;
1144/// let a = m128::from_array([1.0, 2.5, 3.6, 4.7]);
1145/// let b = convert_to_m128d_from_lower2_m128(a);
1146/// assert_eq!(b.to_bits(), [1_f64.to_bits(), 2.5_f64.to_bits()]);
1147/// ```
1148/// * **Intrinsic:** [`_mm_cvtps_pd`]
1149/// * **Assembly:** `cvtps2pd xmm, xmm`
1150#[must_use]
1151#[inline(always)]
1152#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1153pub fn convert_to_m128d_from_lower2_m128(a: m128) -> m128d {
1154 m128d(unsafe { _mm_cvtps_pd(a.0) })
1155}
1156
1157/// Gets the lower lane as an `f64` value.
1158/// ```
1159/// # use safe_arch::*;
1160/// let a = m128d::from_array([1.0, 2.5]);
1161/// let b = get_f64_from_m128d_s(a);
1162/// assert_eq!(b, 1.0_f64);
1163/// ```
1164#[must_use]
1165#[inline(always)]
1166#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1167pub fn get_f64_from_m128d_s(a: m128d) -> f64 {
1168 unsafe { _mm_cvtsd_f64(a.0) }
1169}
1170
1171/// Converts the lower lane to an `i32` value.
1172/// ```
1173/// # use safe_arch::*;
1174/// let a = m128d::from_array([1.0, 2.5]);
1175/// let b = get_i32_from_m128d_s(a);
1176/// assert_eq!(b, 1_i32);
1177/// ```
1178#[must_use]
1179#[inline(always)]
1180#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1181pub fn get_i32_from_m128d_s(a: m128d) -> i32 {
1182 unsafe { _mm_cvtsd_si32(a.0) }
1183}
1184
1185/// Converts the lower lane to an `i64` value.
1186/// ```
1187/// # use safe_arch::*;
1188/// let a = m128d::from_array([1.0, 2.5]);
1189/// let b = get_i64_from_m128d_s(a);
1190/// assert_eq!(b, 1_i64);
1191/// ```
1192#[must_use]
1193#[inline(always)]
1194#[cfg(target_arch = "x86_64")]
1195#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1196pub fn get_i64_from_m128d_s(a: m128d) -> i64 {
1197 unsafe { _mm_cvtsd_si64(a.0) }
1198}
1199
1200/// Converts the low `f64` to `f32` and replaces the low lane of the input.
1201/// ```
1202/// # use safe_arch::*;
1203/// let a = m128::from_array([3.0, 4.0, 5.0, 6.0]);
1204/// let b = m128d::from_array([1.0, 2.5]);
1205/// let c = convert_m128d_s_replace_m128_s(a, b);
1206/// assert_eq!(c.to_array(), [1.0, 4.0, 5.0, 6.0]);
1207/// ```
1208/// * **Intrinsic:** [`_mm_cvtsd_ss`]
1209/// * **Assembly:** `cvtsd2ss xmm, xmm`
1210#[must_use]
1211#[inline(always)]
1212#[cfg(target_arch = "x86_64")]
1213#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1214pub fn convert_m128d_s_replace_m128_s(a: m128, b: m128d) -> m128 {
1215 m128(unsafe { _mm_cvtsd_ss(a.0, b.0) })
1216}
1217
1218/// Converts the lower lane to an `i32` value.
1219/// ```
1220/// # use safe_arch::*;
1221/// let a = m128i::from([1, 3, 5, 7]);
1222/// let b = get_i32_from_m128i_s(a);
1223/// assert_eq!(b, 1_i32);
1224/// ```
1225#[must_use]
1226#[inline(always)]
1227#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1228pub fn get_i32_from_m128i_s(a: m128i) -> i32 {
1229 unsafe { _mm_cvtsi128_si32(a.0) }
1230}
1231
1232/// Converts the lower lane to an `i64` value.
1233/// ```
1234/// # use safe_arch::*;
1235/// let a = m128i::from([1_i64, 3]);
1236/// let b = get_i64_from_m128i_s(a);
1237/// assert_eq!(b, 1_i64);
1238/// ```
1239#[must_use]
1240#[inline(always)]
1241#[cfg(target_arch = "x86_64")]
1242#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1243pub fn get_i64_from_m128i_s(a: m128i) -> i64 {
1244 unsafe { _mm_cvtsi128_si64(a.0) }
1245}
1246
1247/// Convert `i32` to `f64` and replace the low lane of the input.
1248/// ```
1249/// # use safe_arch::*;
1250/// let a = m128d::from_array([1.0, 2.0]);
1251/// let b = convert_i32_replace_m128d_s(a, 5_i32);
1252/// assert_eq!(b.to_array(), [5.0, 2.0]);
1253/// ```
1254/// * **Intrinsic:** [`_mm_cvtsi32_sd`]
1255/// * **Assembly:** `cvtsi2sd xmm, r32`
1256#[must_use]
1257#[inline(always)]
1258#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1259pub fn convert_i32_replace_m128d_s(a: m128d, i: i32) -> m128d {
1260 m128d(unsafe { _mm_cvtsi32_sd(a.0, i) })
1261}
1262
1263/// Set an `i32` as the low 32-bit lane of an `m128i`, other lanes blank.
1264/// ```
1265/// # use safe_arch::*;
1266/// let a: [i32; 4] = set_i32_m128i_s(1_i32).into();
1267/// let b: [i32; 4] = m128i::from([1, 0, 0, 0]).into();
1268/// assert_eq!(a, b);
1269/// ```
1270#[must_use]
1271#[inline(always)]
1272#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1273pub fn set_i32_m128i_s(i: i32) -> m128i {
1274 m128i(unsafe { _mm_cvtsi32_si128(i) })
1275}
1276
1277/// Convert `i64` to `f64` and replace the low lane of the input.
1278/// ```
1279/// # use safe_arch::*;
1280/// let a = m128d::from_array([1.0, 2.0]);
1281/// let b = convert_i64_replace_m128d_s(a, 5_i64);
1282/// assert_eq!(b.to_array(), [5.0, 2.0]);
1283/// ```
1284/// * **Intrinsic:** [`_mm_cvtsi64_sd`]
1285/// * **Assembly:** `cvtsi2sd xmm, r64`
1286#[must_use]
1287#[inline(always)]
1288#[cfg(target_arch = "x86_64")]
1289#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1290pub fn convert_i64_replace_m128d_s(a: m128d, i: i64) -> m128d {
1291 m128d(unsafe { _mm_cvtsi64_sd(a.0, i) })
1292}
1293
1294/// Set an `i64` as the low 64-bit lane of an `m128i`, other lanes blank.
1295/// ```
1296/// # use safe_arch::*;
1297/// let a: [i64; 2] = set_i64_m128i_s(1_i64).into();
1298/// let b: [i64; 2] = m128i::from([1_i64, 0]).into();
1299/// assert_eq!(a, b);
1300/// ```
1301#[must_use]
1302#[inline(always)]
1303#[cfg(target_arch = "x86_64")]
1304#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1305pub fn set_i64_m128i_s(i: i64) -> m128i {
1306 m128i(unsafe { _mm_cvtsi64_si128(i) })
1307}
1308
1309/// Converts the lower `f32` to `f64` and replace the low lane of the input
1310/// ```
1311/// # use safe_arch::*;
1312/// let a = m128d::from_array([1.0, 2.5]);
1313/// let b = m128::from_array([3.0, 4.0, 5.0, 6.0]);
1314/// let c = convert_m128_s_replace_m128d_s(a, b);
1315/// assert_eq!(c.to_array(), [3.0, 2.5]);
1316/// ```
1317/// * **Intrinsic:** [`_mm_cvtss_sd`]
1318/// * **Assembly:** `cvtss2sd xmm, xmm`
1319#[must_use]
1320#[inline(always)]
1321#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1322pub fn convert_m128_s_replace_m128d_s(a: m128d, b: m128) -> m128d {
1323 m128d(unsafe { _mm_cvtss_sd(a.0, b.0) })
1324}
1325
1326/// Truncate the `f64` lanes to the lower `i32` lanes (upper `i32` lanes 0).
1327/// ```
1328/// # use safe_arch::*;
1329/// let a = m128d::from_array([1.1, 2.6]);
1330/// let b = truncate_m128d_to_m128i(a);
1331/// assert_eq!(<[i32; 4]>::from(b), [1, 2, 0, 0]);
1332/// ```
1333#[must_use]
1334#[inline(always)]
1335#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1336pub fn truncate_m128d_to_m128i(a: m128d) -> m128i {
1337 m128i(unsafe { _mm_cvttpd_epi32(a.0) })
1338}
1339
1340/// Truncate the `f32` lanes to `i32` lanes.
1341/// ```
1342/// # use safe_arch::*;
1343/// let a = m128::from_array([1.1, 2.6, 3.5, 4.0]);
1344/// let b = truncate_m128_to_m128i(a);
1345/// assert_eq!(<[i32; 4]>::from(b), [1, 2, 3, 4]);
1346/// ```
1347#[must_use]
1348#[inline(always)]
1349#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1350pub fn truncate_m128_to_m128i(a: m128) -> m128i {
1351 m128i(unsafe { _mm_cvttps_epi32(a.0) })
1352}
1353
1354/// Truncate the lower lane into an `i32`.
1355/// ```
1356/// # use safe_arch::*;
1357/// let a = m128d::from_array([1.7, 2.6]);
1358/// assert_eq!(truncate_to_i32_m128d_s(a), 1_i32);
1359/// ```
1360#[must_use]
1361#[inline(always)]
1362#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1363pub fn truncate_to_i32_m128d_s(a: m128d) -> i32 {
1364 unsafe { _mm_cvttsd_si32(a.0) }
1365}
1366
1367/// Truncate the lower lane into an `i64`.
1368/// ```
1369/// # use safe_arch::*;
1370/// let a = m128d::from_array([1.7, 2.6]);
1371/// assert_eq!(truncate_to_i64_m128d_s(a), 1_i64);
1372/// ```
1373#[must_use]
1374#[inline(always)]
1375#[cfg(target_arch = "x86_64")]
1376#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1377pub fn truncate_to_i64_m128d_s(a: m128d) -> i64 {
1378 unsafe { _mm_cvttsd_si64(a.0) }
1379}
1380
1381/// Lanewise `a / b`.
1382/// ```
1383/// # use safe_arch::*;
1384/// let a = m128d::from_array([92.0, 42.0]);
1385/// let b = m128d::from_array([100.0, -6.0]);
1386/// let c = div_m128d(a, b).to_array();
1387/// assert_eq!(c, [0.92, -7.0]);
1388/// ```
1389#[must_use]
1390#[inline(always)]
1391#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1392pub fn div_m128d(a: m128d, b: m128d) -> m128d {
1393 m128d(unsafe { _mm_div_pd(a.0, b.0) })
1394}
1395
1396/// Lowest lane `a / b`, high lane unchanged.
1397/// ```
1398/// # use safe_arch::*;
1399/// let a = m128d::from_array([92.0, 87.5]);
1400/// let b = m128d::from_array([100.0, -600.0]);
1401/// let c = div_m128d_s(a, b).to_array();
1402/// assert_eq!(c, [0.92, 87.5]);
1403/// ```
1404#[must_use]
1405#[inline(always)]
1406#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1407pub fn div_m128d_s(a: m128d, b: m128d) -> m128d {
1408 m128d(unsafe { _mm_div_sd(a.0, b.0) })
1409}
1410
1411/// Gets an `i16` value out of an `m128i`, returns as `i32`.
1412///
1413/// The lane to get must be a constant in `0..8`.
1414///
1415/// ```
1416/// # use safe_arch::*;
1417/// let a = m128i::from([0xA_i16, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1418/// //
1419/// assert_eq!(extract_i16_as_i32_m128i::<0>(a), 0xA);
1420/// assert_eq!(extract_i16_as_i32_m128i::<1>(a), 0xB);
1421/// ```
1422#[must_use]
1423#[inline(always)]
1424#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1425pub fn extract_i16_as_i32_m128i<const LANE: i32>(a: m128i) -> i32 {
1426 unsafe { _mm_extract_epi16(a.0, LANE) }
1427}
1428
1429/// Inserts the low 16 bits of an `i32` value into an `m128i`.
1430///
1431/// The lane to get must be a constant in `0..8`.
1432///
1433/// ```
1434/// # use safe_arch::*;
1435/// let a = m128i::from([0xA_i16, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1436/// //
1437/// let b = insert_i16_from_i32_m128i::<0>(a, -1);
1438/// assert_eq!(<[i16; 8]>::from(b), [-1, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1439/// ```
1440#[must_use]
1441#[inline(always)]
1442#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1443pub fn insert_i16_from_i32_m128i<const LANE: i32>(a: m128i, i: i32) -> m128i {
1444 m128i(unsafe { _mm_insert_epi16(a.0, i, LANE) })
1445}
1446
1447/// Loads the reference into a register.
1448/// ```
1449/// # use safe_arch::*;
1450/// let a = m128d::from_array([10.0, 12.0]);
1451/// let b = load_m128d(&a);
1452/// assert_eq!(a.to_bits(), b.to_bits());
1453/// ```
1454#[must_use]
1455#[inline(always)]
1456#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1457pub fn load_m128d(a: &m128d) -> m128d {
1458 m128d(unsafe { _mm_load_pd(a as *const m128d as *const f64) })
1459}
1460
1461/// Loads the `f64` reference into all lanes of a register.
1462/// ```
1463/// # use safe_arch::*;
1464/// let a = 1.0;
1465/// let b = load_f64_splat_m128d(&a);
1466/// assert_eq!(m128d::from_array([1.0, 1.0]).to_bits(), b.to_bits());
1467/// ```
1468#[must_use]
1469#[inline(always)]
1470#[allow(clippy::trivially_copy_pass_by_ref)]
1471#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1472pub fn load_f64_splat_m128d(a: &f64) -> m128d {
1473 m128d(unsafe { _mm_load1_pd(a) })
1474}
1475
1476/// Loads the reference into the low lane of the register.
1477/// ```
1478/// # use safe_arch::*;
1479/// let a = 1.0;
1480/// let b = load_f64_m128d_s(&a);
1481/// assert_eq!(m128d::from_array([1.0, 0.0]).to_bits(), b.to_bits());
1482/// ```
1483#[must_use]
1484#[inline(always)]
1485#[allow(clippy::trivially_copy_pass_by_ref)]
1486#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1487pub fn load_f64_m128d_s(a: &f64) -> m128d {
1488 m128d(unsafe { _mm_load_sd(a) })
1489}
1490
1491/// Loads the reference into a register.
1492/// ```
1493/// # use safe_arch::*;
1494/// let a = m128i::from([1, 2, 3, 4]);
1495/// let b = load_m128i(&a);
1496/// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
1497/// ```
1498#[must_use]
1499#[inline(always)]
1500#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1501pub fn load_m128i(a: &m128i) -> m128i {
1502 m128i(unsafe { _mm_load_si128(a as *const m128i as *const __m128i) })
1503}
1504
1505/// Loads the reference into a register, replacing the high lane.
1506/// ```
1507/// # use safe_arch::*;
1508/// let a = m128d::from([1.0, 2.0]);
1509/// let double = 7.0;
1510/// let b = load_replace_high_m128d(a, &double);
1511/// assert_eq!(b.to_array(), [1.0, 7.0]);
1512/// ```
1513#[must_use]
1514#[inline(always)]
1515#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1516pub fn load_replace_high_m128d(a: m128d, b: &f64) -> m128d {
1517 m128d(unsafe { _mm_loadh_pd(a.0, b) })
1518}
1519
1520/// Loads the low `i64` into a register.
1521/// ```
1522/// # use safe_arch::*;
1523/// let a = m128i::from([1_i64, 2]);
1524/// let b = load_i64_m128i_s(&a);
1525/// assert_eq!([1_i64, 0], <[i64; 2]>::from(b));
1526/// ```
1527#[must_use]
1528#[inline(always)]
1529#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1530pub fn load_i64_m128i_s(a: &m128i) -> m128i {
1531 m128i(unsafe { _mm_loadl_epi64(a as *const m128i as *const __m128i) })
1532}
1533
1534/// Loads the reference into a register, replacing the low lane.
1535/// ```
1536/// # use safe_arch::*;
1537/// let a = m128d::from([1.0, 2.0]);
1538/// let double = 7.0;
1539/// let b = load_replace_low_m128d(a, &double);
1540/// assert_eq!(b.to_array(), [7.0, 2.0]);
1541/// ```
1542#[must_use]
1543#[inline(always)]
1544#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1545pub fn load_replace_low_m128d(a: m128d, b: &f64) -> m128d {
1546 m128d(unsafe { _mm_loadl_pd(a.0, b) })
1547}
1548
1549/// Loads the reference into a register with reversed order.
1550/// ```
1551/// # use safe_arch::*;
1552/// let a = m128d::from_array([10.0, 12.0]);
1553/// let b = load_reverse_m128d(&a);
1554/// assert_eq!(m128d::from_array([12.0, 10.0]).to_bits(), b.to_bits());
1555/// ```
1556#[must_use]
1557#[inline(always)]
1558#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1559pub fn load_reverse_m128d(a: &m128d) -> m128d {
1560 m128d(unsafe { _mm_loadr_pd(a as *const m128d as *const f64) })
1561}
1562
1563/// Loads the reference into a register.
1564///
1565/// This generally has no speed penalty if the reference happens to be 16-byte
1566/// aligned, but there is a slight speed penalty if the reference is only 8-byte
1567/// aligned.
1568/// ```
1569/// # use safe_arch::*;
1570/// let a = [10.0, 12.0];
1571/// let b = load_unaligned_m128d(&a);
1572/// assert_eq!(m128d::from_array(a).to_bits(), b.to_bits());
1573/// ```
1574#[must_use]
1575#[inline(always)]
1576#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1577pub fn load_unaligned_m128d(a: &[f64; 2]) -> m128d {
1578 m128d(unsafe { _mm_loadu_pd(a as *const [f64; 2] as *const f64) })
1579}
1580
1581/// Loads the reference into a register.
1582///
1583/// This generally has no speed penalty if the reference happens to be 16-byte
1584/// aligned, but there is a slight speed penalty if the reference is less
1585/// aligned.
1586/// ```
1587/// # use safe_arch::*;
1588/// let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
1589/// let b = load_unaligned_m128i(&a);
1590/// assert_eq!(a, <[u8; 16]>::from(b));
1591/// ```
1592#[must_use]
1593#[inline(always)]
1594#[allow(clippy::cast_ptr_alignment)]
1595#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1596pub fn load_unaligned_m128i(a: &[u8; 16]) -> m128i {
1597 m128i(unsafe { _mm_loadu_si128(a as *const [u8; 16] as *const __m128i) })
1598}
1599
1600/// Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
1601/// values to produce the final output.
1602/// ```
1603/// # use safe_arch::*;
1604/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1605/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1606/// let c: [i32; 4] = mul_i16_horizontal_add_m128i(a, b).into();
1607/// assert_eq!(c, [17, 53, 67, -81]);
1608/// ```
1609#[must_use]
1610#[inline(always)]
1611#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1612pub fn mul_i16_horizontal_add_m128i(a: m128i, b: m128i) -> m128i {
1613 m128i(unsafe { _mm_madd_epi16(a.0, b.0) })
1614}
1615
1616/// Lanewise `max(a, b)` with lanes as `u8`.
1617/// ```
1618/// # use safe_arch::*;
1619/// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
1620/// let b = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1621/// let c: [u8; 16] = max_u8_m128i(a, b).into();
1622/// assert_eq!(c, [0, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1623/// ```
1624#[must_use]
1625#[inline(always)]
1626#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1627pub fn max_u8_m128i(a: m128i, b: m128i) -> m128i {
1628 m128i(unsafe { _mm_max_epu8(a.0, b.0) })
1629}
1630
1631/// Lanewise `max(a, b)` with lanes as `i16`.
1632/// ```
1633/// # use safe_arch::*;
1634/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1635/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1636/// let c: [i16; 8] = max_i16_m128i(a, b).into();
1637/// assert_eq!(c, [5_i16, 6, 7, 8, -1, -2, -3, 48]);
1638/// ```
1639#[must_use]
1640#[inline(always)]
1641#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1642pub fn max_i16_m128i(a: m128i, b: m128i) -> m128i {
1643 m128i(unsafe { _mm_max_epi16(a.0, b.0) })
1644}
1645
1646/// Lanewise `max(a, b)`.
1647/// ```
1648/// # use safe_arch::*;
1649/// let a = m128d::from_array([5.0, 2.0]);
1650/// let b = m128d::from_array([1.0, 6.0]);
1651/// let c = max_m128d(a, b).to_array();
1652/// assert_eq!(c, [5.0, 6.0]);
1653/// ```
1654#[must_use]
1655#[inline(always)]
1656#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1657pub fn max_m128d(a: m128d, b: m128d) -> m128d {
1658 m128d(unsafe { _mm_max_pd(a.0, b.0) })
1659}
1660
1661/// Low lane `max(a, b)`, other lanes unchanged.
1662/// ```
1663/// # use safe_arch::*;
1664/// let a = m128d::from_array([1.0, 12.0]);
1665/// let b = m128d::from_array([5.0, 6.0]);
1666/// let c = max_m128d_s(a, b).to_array();
1667/// assert_eq!(c, [5.0, 12.0]);
1668/// ```
1669#[must_use]
1670#[inline(always)]
1671#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1672pub fn max_m128d_s(a: m128d, b: m128d) -> m128d {
1673 m128d(unsafe { _mm_max_sd(a.0, b.0) })
1674}
1675
1676/// Lanewise `min(a, b)` with lanes as `u8`.
1677/// ```
1678/// # use safe_arch::*;
1679/// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
1680/// let b = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 0, 20, 0, 22, 0, 24, 0]);
1681/// let c: [u8; 16] = min_u8_m128i(a, b).into();
1682/// assert_eq!(c, [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 0, 12, 0, 14, 0]);
1683/// ```
1684#[must_use]
1685#[inline(always)]
1686#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1687pub fn min_u8_m128i(a: m128i, b: m128i) -> m128i {
1688 m128i(unsafe { _mm_min_epu8(a.0, b.0) })
1689}
1690
1691/// Lanewise `min(a, b)` with lanes as `i16`.
1692/// ```
1693/// # use safe_arch::*;
1694/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1695/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1696/// let c: [i16; 8] = min_i16_m128i(a, b).into();
1697/// assert_eq!(c, [1_i16, 2, 3, 4, -15, -26, -37, -4]);
1698/// ```
1699#[must_use]
1700#[inline(always)]
1701#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1702pub fn min_i16_m128i(a: m128i, b: m128i) -> m128i {
1703 m128i(unsafe { _mm_min_epi16(a.0, b.0) })
1704}
1705
1706/// Lanewise `min(a, b)`.
1707/// ```
1708/// # use safe_arch::*;
1709/// let a = m128d::from_array([1.0, 12.0]);
1710/// let b = m128d::from_array([5.0, 6.0]);
1711/// let c = min_m128d(a, b).to_array();
1712/// assert_eq!(c, [1.0, 6.0]);
1713/// ```
1714#[must_use]
1715#[inline(always)]
1716#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1717pub fn min_m128d(a: m128d, b: m128d) -> m128d {
1718 m128d(unsafe { _mm_min_pd(a.0, b.0) })
1719}
1720
1721/// Low lane `min(a, b)`, other lanes unchanged.
1722/// ```
1723/// # use safe_arch::*;
1724/// let a = m128d::from_array([1.0, 12.0]);
1725/// let b = m128d::from_array([0.0, 6.0]);
1726/// let c = min_m128d_s(a, b).to_array();
1727/// assert_eq!(c, [0.0, 12.0]);
1728/// ```
1729#[must_use]
1730#[inline(always)]
1731#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1732pub fn min_m128d_s(a: m128d, b: m128d) -> m128d {
1733 m128d(unsafe { _mm_min_sd(a.0, b.0) })
1734}
1735
1736/// Copy the low `i64` lane to a new register, upper bits 0.
1737/// ```
1738/// # use safe_arch::*;
1739/// let a = m128i::from([1_i64, 2]);
1740/// let b = copy_i64_m128i_s(a);
1741/// assert_eq!(<[i64; 2]>::from(b), [1, 0]);
1742/// ```
1743#[must_use]
1744#[inline(always)]
1745#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1746pub fn copy_i64_m128i_s(a: m128i) -> m128i {
1747 m128i(unsafe { _mm_move_epi64(a.0) })
1748}
1749
1750/// Copies the `a` value and replaces the low lane with the low `b` value.
1751/// ```
1752/// # use safe_arch::*;
1753/// let a = m128d::from([1.0, 2.0]);
1754/// let b = m128d::from([3.0, 4.0]);
1755/// let c = copy_replace_low_f64_m128d(a, b);
1756/// assert_eq!(c.to_array(), [3.0, 2.0]);
1757/// ```
1758#[must_use]
1759#[inline(always)]
1760#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1761pub fn copy_replace_low_f64_m128d(a: m128d, b: m128d) -> m128d {
1762 m128d(unsafe { _mm_move_sd(a.0, b.0) })
1763}
1764
1765/// Gathers the `i8` sign bit of each lane.
1766///
1767/// The output has lane 0 as bit 0, lane 1 as bit 1, and so on.
1768/// ```
1769/// # use safe_arch::*;
1770/// let a = m128i::from([0_i8, -11, -2, 13, 4, 15, -6, 17, 8, 19, -20, 21, 22, 23, -24, 127]);
1771/// let i = move_mask_i8_m128i(a);
1772/// assert_eq!(i, 0b0100010001000110);
1773/// ```
1774#[must_use]
1775#[inline(always)]
1776#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1777pub fn move_mask_i8_m128i(a: m128i) -> i32 {
1778 unsafe { _mm_movemask_epi8(a.0) }
1779}
1780
1781/// Gathers the sign bit of each lane.
1782///
1783/// The output has lane 0 as bit 0, lane 1 as bit 1.
1784/// ```
1785/// # use safe_arch::*;
1786/// let a = m128d::from_array([-1.0, 12.0]);
1787/// let i = move_mask_m128d(a);
1788/// assert_eq!(i, 0b01);
1789/// ```
1790#[must_use]
1791#[inline(always)]
1792#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1793pub fn move_mask_m128d(a: m128d) -> i32 {
1794 unsafe { _mm_movemask_pd(a.0) }
1795}
1796
1797/// Multiplies the odd `u32` lanes and gives the widened (`u64`) results.
1798///
1799/// ```
1800/// # use safe_arch::*;
1801/// let a = m128i::from([1, 7, u32::MAX, 7]);
1802/// let b = m128i::from([5, 7, u32::MAX, 7]);
1803/// let c: [u64; 2] = mul_widen_u32_odd_m128i(a, b).into();
1804/// assert_eq!(c, [(1 * 5), (u32::MAX as u64 * u32::MAX as u64)]);
1805/// ```
1806#[must_use]
1807#[inline(always)]
1808#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1809pub fn mul_widen_u32_odd_m128i(a: m128i, b: m128i) -> m128i {
1810 m128i(unsafe { _mm_mul_epu32(a.0, b.0) })
1811}
1812
1813/// Lanewise `a * b`.
1814/// ```
1815/// # use safe_arch::*;
1816/// let a = m128d::from_array([92.0, 87.5]);
1817/// let b = m128d::from_array([100.0, -6.0]);
1818/// let c = mul_m128d(a, b).to_array();
1819/// assert_eq!(c, [9200.0, -525.0]);
1820/// ```
1821#[must_use]
1822#[inline(always)]
1823#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1824pub fn mul_m128d(a: m128d, b: m128d) -> m128d {
1825 m128d(unsafe { _mm_mul_pd(a.0, b.0) })
1826}
1827
1828/// Lowest lane `a * b`, high lane unchanged.
1829/// ```
1830/// # use safe_arch::*;
1831/// let a = m128d::from_array([92.0, 87.5]);
1832/// let b = m128d::from_array([100.0, -600.0]);
1833/// let c = mul_m128d_s(a, b).to_array();
1834/// assert_eq!(c, [9200.0, 87.5]);
1835/// ```
1836#[must_use]
1837#[inline(always)]
1838#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1839pub fn mul_m128d_s(a: m128d, b: m128d) -> m128d {
1840 m128d(unsafe { _mm_mul_sd(a.0, b.0) })
1841}
1842
1843/// Lanewise `a * b` with lanes as `i16`, keep the high bits of the `i32`
1844/// intermediates.
1845/// ```
1846/// # use safe_arch::*;
1847/// let a = m128i::from([1_i16, 200, 300, 4568, -1, -2, -3, -4]);
1848/// let b = m128i::from([5_i16, 600, 700, 8910, -15, -26, -37, 48]);
1849/// let c: [i16; 8] = mul_i16_keep_high_m128i(a, b).into();
1850/// assert_eq!(c, [0, 1, 3, 621, 0, 0, 0, -1]);
1851/// ```
1852#[must_use]
1853#[inline(always)]
1854#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1855pub fn mul_i16_keep_high_m128i(a: m128i, b: m128i) -> m128i {
1856 m128i(unsafe { _mm_mulhi_epi16(a.0, b.0) })
1857}
1858
1859/// Lanewise `a * b` with lanes as `u16`, keep the high bits of the `u32`
1860/// intermediates.
1861/// ```
1862/// # use safe_arch::*;
1863/// let a = m128i::from([1_u16, 2003, 3005, 45687, 1, 2, 3, 4]);
1864/// let b = m128i::from([5_u16, 6004, 7006, 8910, 15, 26, 37, 48]);
1865/// let c: [u16; 8] = mul_u16_keep_high_m128i(a, b).into();
1866/// assert_eq!(c, [0, 183, 321, 6211, 0, 0, 0, 0]);
1867/// ```
1868#[must_use]
1869#[inline(always)]
1870#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1871pub fn mul_u16_keep_high_m128i(a: m128i, b: m128i) -> m128i {
1872 m128i(unsafe { _mm_mulhi_epu16(a.0, b.0) })
1873}
1874
1875/// Lanewise `a * b` with lanes as `i16`, keep the low bits of the `i32`
1876/// intermediates.
1877/// ```
1878/// # use safe_arch::*;
1879/// let a = m128i::from([1_i16, 200, 300, 4568, -1, -2, -3, -4]);
1880/// let b = m128i::from([5_i16, 600, 700, 8910, -15, -26, -37, 48]);
1881/// let c: [i16; 8] = mul_i16_keep_low_m128i(a, b).into();
1882/// assert_eq!(c, [5, -11072, 13392, 3024, 15, 52, 111, -192]);
1883/// ```
1884#[must_use]
1885#[inline(always)]
1886#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1887pub fn mul_i16_keep_low_m128i(a: m128i, b: m128i) -> m128i {
1888 m128i(unsafe { _mm_mullo_epi16(a.0, b.0) })
1889}
1890
1891/// Bitwise `a | b`.
1892/// ```
1893/// # use safe_arch::*;
1894/// let a = m128d::from_array([1.0, 0.0]);
1895/// let b = m128d::from_array([1.0, 1.0]);
1896/// let c = bitor_m128d(a, b).to_array();
1897/// assert_eq!(c, [1.0, 1.0]);
1898/// ```
1899#[must_use]
1900#[inline(always)]
1901#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1902pub fn bitor_m128d(a: m128d, b: m128d) -> m128d {
1903 m128d(unsafe { _mm_or_pd(a.0, b.0) })
1904}
1905
1906/// Bitwise `a | b`.
1907/// ```
1908/// # use safe_arch::*;
1909/// let a = m128i::from([1, 0, 1, 0]);
1910/// let b = m128i::from([1, 1, 0, 0]);
1911/// let c: [i32; 4] = bitor_m128i(a, b).into();
1912/// assert_eq!(c, [1, 1, 1, 0]);
1913/// ```
1914#[must_use]
1915#[inline(always)]
1916#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1917pub fn bitor_m128i(a: m128i, b: m128i) -> m128i {
1918 m128i(unsafe { _mm_or_si128(a.0, b.0) })
1919}
1920
1921/// Saturating convert `i16` to `i8`, and pack the values.
1922/// ```
1923/// # use safe_arch::*;
1924/// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
1925/// let b = m128i::from([9_i16, 10, 11, 12, 13, 14, 15, 16]);
1926/// let c: [i8; 16] = pack_i16_to_i8_m128i(a, b).into();
1927/// assert_eq!(c, [1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1928/// ```
1929#[must_use]
1930#[inline(always)]
1931#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1932pub fn pack_i16_to_i8_m128i(a: m128i, b: m128i) -> m128i {
1933 m128i(unsafe { _mm_packs_epi16(a.0, b.0) })
1934}
1935
1936/// Saturating convert `i32` to `i16`, and pack the values.
1937/// ```
1938/// # use safe_arch::*;
1939/// let a = m128i::from([1_i32, 2, 3, 4]);
1940/// let b = m128i::from([5_i32, 6, 7, 8]);
1941/// let c: [i16; 8] = pack_i32_to_i16_m128i(a, b).into();
1942/// assert_eq!(c, [1_i16, 2, 3, 4, 5, 6, 7, 8]);
1943/// ```
1944#[must_use]
1945#[inline(always)]
1946#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1947pub fn pack_i32_to_i16_m128i(a: m128i, b: m128i) -> m128i {
1948 m128i(unsafe { _mm_packs_epi32(a.0, b.0) })
1949}
1950
1951/// Saturating convert `i16` to `u8`, and pack the values.
1952/// ```
1953/// # use safe_arch::*;
1954/// let a = m128i::from([-1_i16, 2, -3, 4, -5, 6, -7, 8]);
1955/// let b = m128i::from([9_i16, 10, 11, 12, 13, -14, 15, -16]);
1956/// let c: [u8; 16] = pack_i16_to_u8_m128i(a, b).into();
1957/// assert_eq!(c, [0, 2, 0, 4, 0, 6, 0, 8, 9, 10, 11, 12, 13, 0, 15, 0]);
1958/// ```
1959#[must_use]
1960#[inline(always)]
1961#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1962pub fn pack_i16_to_u8_m128i(a: m128i, b: m128i) -> m128i {
1963 m128i(unsafe { _mm_packus_epi16(a.0, b.0) })
1964}
1965
1966/// Compute "sum of `u8` absolute differences".
1967///
1968/// * `u8` lanewise `abs(a - b)`, producing `u8` intermediate values.
1969/// * Sum the first eight and second eight values.
1970/// * Place into the low 16 bits of two `u64` lanes.
1971/// ```
1972/// # use safe_arch::*;
1973/// let a = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1974/// let b = m128i::from([20_u8, 110, 250, 103, 34, 105, 60, 217, 8, 19, 210, 201, 202, 203, 204, 127]);
1975/// let c: [u64; 2] = sum_of_u8_abs_diff_m128i(a, b).into();
1976/// assert_eq!(c, [831_u64, 910]);
1977/// ```
1978#[must_use]
1979#[inline(always)]
1980#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1981pub fn sum_of_u8_abs_diff_m128i(a: m128i, b: m128i) -> m128i {
1982 m128i(unsafe { _mm_sad_epu8(a.0, b.0) })
1983}
1984
1985/// Sets the args into an `m128i`, first arg is the high lane.
1986/// ```
1987/// # use safe_arch::*;
1988/// let a = m128i::from([15_i8, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
1989/// let b = set_i8_m128i(0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1990/// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(b));
1991/// ```
1992#[must_use]
1993#[inline(always)]
1994#[allow(clippy::too_many_arguments)]
1995#[allow(clippy::many_single_char_names)]
1996#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
1997pub fn set_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i {
1998 m128i(unsafe { _mm_set_epi8(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) })
1999}
2000
2001/// Sets the args into an `m128i`, first arg is the high lane.
2002/// ```
2003/// # use safe_arch::*;
2004/// let a = m128i::from([7_i16, 6, 5, 4, 3, 2, 1, 0]);
2005/// let b = set_i16_m128i(0_i16, 1, 2, 3, 4, 5, 6, 7);
2006/// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(b));
2007/// ```
2008#[must_use]
2009#[inline(always)]
2010#[allow(clippy::too_many_arguments)]
2011#[allow(clippy::many_single_char_names)]
2012#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2013pub fn set_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i {
2014 m128i(unsafe { _mm_set_epi16(a, b, c, d, e, f, g, h) })
2015}
2016
2017/// Sets the args into an `m128i`, first arg is the high lane.
2018/// ```
2019/// # use safe_arch::*;
2020/// let a = m128i::from([3, 2, 1, 0]);
2021/// let b = set_i32_m128i(0, 1, 2, 3);
2022/// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
2023/// ```
2024#[must_use]
2025#[inline(always)]
2026#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2027pub fn set_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i {
2028 m128i(unsafe { _mm_set_epi32(a, b, c, d) })
2029}
2030
2031/// Sets the args into an `m128i`, first arg is the high lane.
2032/// ```
2033/// # use safe_arch::*;
2034/// let a = m128i::from([1_i64, 0]);
2035/// let b = set_i64_m128i(0, 1);
2036/// assert_eq!(<[i64; 2]>::from(a), <[i64; 2]>::from(b));
2037/// ```
2038#[must_use]
2039#[inline(always)]
2040#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2041pub fn set_i64_m128i(a: i64, b: i64) -> m128i {
2042 m128i(unsafe { _mm_set_epi64x(a, b) })
2043}
2044
2045/// Sets the args into an `m128d`, first arg is the high lane.
2046/// ```
2047/// # use safe_arch::*;
2048/// let a = m128d::from_array([1.0, 0.0]);
2049/// let b = set_m128d(0.0, 1.0);
2050/// assert_eq!(a.to_array(), b.to_array());
2051/// ```
2052#[must_use]
2053#[inline(always)]
2054#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2055pub fn set_m128d(a: f64, b: f64) -> m128d {
2056 m128d(unsafe { _mm_set_pd(a, b) })
2057}
2058
2059/// Sets the args into the low lane of a `m128d`.
2060/// ```
2061/// # use safe_arch::*;
2062/// let a = m128d::from_array([1.0, 0.0]);
2063/// let b = set_m128d_s(1.0);
2064/// assert_eq!(a.to_array(), b.to_array());
2065/// ```
2066#[must_use]
2067#[inline(always)]
2068#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2069pub fn set_m128d_s(a: f64) -> m128d {
2070 m128d(unsafe { _mm_set_sd(a) })
2071}
2072
2073/// Splats the args into both lanes of the `m128d`.
2074/// ```
2075/// # use safe_arch::*;
2076/// let a = m128d::from_array([1.0, 1.0]);
2077/// let b = set_splat_m128d(1.0);
2078/// assert_eq!(a.to_array(), b.to_array());
2079/// ```
2080#[must_use]
2081#[inline(always)]
2082#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2083pub fn set_splat_m128d(a: f64) -> m128d {
2084 m128d(unsafe { _mm_set1_pd(a) })
2085}
2086
2087/// Splats the `i8` to all lanes of the `m128i`.
2088/// ```
2089/// # use safe_arch::*;
2090/// let a = m128i::from([1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
2091/// let b = set_splat_i8_m128i(1);
2092/// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(a));
2093/// ```
2094#[must_use]
2095#[inline(always)]
2096#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2097pub fn set_splat_i8_m128i(i: i8) -> m128i {
2098 m128i(unsafe { _mm_set1_epi8(i) })
2099}
2100
2101/// Splats the `i16` to all lanes of the `m128i`.
2102/// ```
2103/// # use safe_arch::*;
2104/// let a = m128i::from([1_i16, 1, 1, 1, 1, 1, 1, 1]);
2105/// let b = set_splat_i16_m128i(1);
2106/// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(a));
2107/// ```
2108#[must_use]
2109#[inline(always)]
2110#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2111pub fn set_splat_i16_m128i(i: i16) -> m128i {
2112 m128i(unsafe { _mm_set1_epi16(i) })
2113}
2114
2115/// Splats the `i32` to all lanes of the `m128i`.
2116/// ```
2117/// # use safe_arch::*;
2118/// let a = m128i::from([1, 1, 1, 1]);
2119/// let b = set_splat_i32_m128i(1);
2120/// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(a));
2121/// ```
2122#[must_use]
2123#[inline(always)]
2124#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2125pub fn set_splat_i32_m128i(i: i32) -> m128i {
2126 m128i(unsafe { _mm_set1_epi32(i) })
2127}
2128
2129/// Splats the `i64` to both lanes of the `m128i`.
2130/// ```
2131/// # use safe_arch::*;
2132/// let a = m128i::from([1_i64, 1]);
2133/// let b = set_splat_i64_m128i(1);
2134/// assert_eq!(<[i64; 2]>::from(a), <[i64; 2]>::from(a));
2135/// ```
2136#[must_use]
2137#[inline(always)]
2138#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2139pub fn set_splat_i64_m128i(i: i64) -> m128i {
2140 m128i(unsafe { _mm_set1_epi64x(i) })
2141}
2142
2143/// Sets the args into an `m128i`, first arg is the low lane.
2144/// ```
2145/// # use safe_arch::*;
2146/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2147/// let b = set_reversed_i8_m128i(0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2148/// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(b));
2149/// ```
2150#[must_use]
2151#[inline(always)]
2152#[allow(clippy::too_many_arguments)]
2153#[allow(clippy::many_single_char_names)]
2154#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2155pub fn set_reversed_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i {
2156 m128i(unsafe { _mm_setr_epi8(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) })
2157}
2158
2159/// Sets the args into an `m128i`, first arg is the low lane.
2160/// ```
2161/// # use safe_arch::*;
2162/// let a = m128i::from([0_i16, 1, 2, 3, 4, 5, 6, 7]);
2163/// let b = set_reversed_i16_m128i(0_i16, 1, 2, 3, 4, 5, 6, 7);
2164/// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(b));
2165/// ```
2166#[must_use]
2167#[inline(always)]
2168#[allow(clippy::too_many_arguments)]
2169#[allow(clippy::many_single_char_names)]
2170#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2171pub fn set_reversed_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i {
2172 m128i(unsafe { _mm_setr_epi16(a, b, c, d, e, f, g, h) })
2173}
2174
2175/// Sets the args into an `m128i`, first arg is the low lane.
2176/// ```
2177/// # use safe_arch::*;
2178/// let a = m128i::from([0, 1, 2, 3]);
2179/// let b = set_reversed_i32_m128i(0, 1, 2, 3);
2180/// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
2181/// ```
2182#[must_use]
2183#[inline(always)]
2184#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2185pub fn set_reversed_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i {
2186 m128i(unsafe { _mm_setr_epi32(a, b, c, d) })
2187}
2188
2189/// Sets the args into an `m128d`, first arg is the low lane.
2190/// ```
2191/// # use safe_arch::*;
2192/// let a = m128d::from_array([0.0, 1.0]);
2193/// let b = set_reversed_m128d(0.0, 1.0);
2194/// assert_eq!(a.to_array(), b.to_array());
2195/// ```
2196#[must_use]
2197#[inline(always)]
2198#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2199pub fn set_reversed_m128d(a: f64, b: f64) -> m128d {
2200 m128d(unsafe { _mm_setr_pd(a, b) })
2201}
2202
2203/// All lanes zero.
2204/// ```
2205/// # use safe_arch::*;
2206/// let a = zeroed_m128i();
2207/// assert_eq!(u128::from(a), 0);
2208/// ```
2209#[must_use]
2210#[inline(always)]
2211#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2212pub fn zeroed_m128i() -> m128i {
2213 m128i(unsafe { _mm_setzero_si128() })
2214}
2215
2216/// Both lanes zero.
2217/// ```
2218/// # use safe_arch::*;
2219/// let a = zeroed_m128d();
2220/// assert_eq!(a.to_array(), [0.0, 0.0]);
2221/// ```
2222#[must_use]
2223#[inline(always)]
2224#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2225pub fn zeroed_m128d() -> m128d {
2226 m128d(unsafe { _mm_setzero_pd() })
2227}
2228
2229/// Shuffle the `i32` lanes in `$a` using an immediate
2230/// control value.
2231///
2232/// ```
2233/// # use safe_arch::*;
2234/// let a = m128i::from([6, 7, 8, 9]);
2235/// //
2236/// let c = shuffle_ai_f32_all_m128i::<0b01_10_10_00>(a);
2237/// assert_eq!(<[i32; 4]>::from(c), [6, 8, 8, 7]);
2238/// ```
2239/// * **Intrinsic:** [`_mm_shuffle_epi32`]
2240/// * **Assembly:** `pshufd xmm, xmm, imm8`
2241#[must_use]
2242#[inline(always)]
2243#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2244pub fn shuffle_ai_f32_all_m128i<const MASK: i32>(a: m128i) -> m128i {
2245 m128i(unsafe { _mm_shuffle_epi32(a.0, MASK) })
2246}
2247
2248/// Shuffle the `f64` lanes from `$a` and `$b` together using an immediate
2249/// control value.
2250///
2251/// The `a:` and `b:` prefixes on the index selection values are literal tokens
2252/// that you type. It helps keep clear what value comes from where. The first
2253/// two output lanes come from `$a`, the second two output lanes come from `$b`.
2254///
2255/// You can pass the same value as both arguments, but if you want to swizzle
2256/// within only a single register and you have `avx` available consider using
2257/// [`shuffle_ai_f64_all_m128d`] instead. You'll get much better performance.
2258/// ```
2259/// # use safe_arch::*;
2260/// let a = m128d::from_array([1.0, 2.0]);
2261/// let b = m128d::from_array([3.0, 4.0]);
2262/// //
2263/// let c = shuffle_abi_f64_all_m128d::<0b00>(a, b).to_array();
2264/// assert_eq!(c, [1.0, 3.0]);
2265/// //
2266/// let c = shuffle_abi_f64_all_m128d::<0b10>(a, b).to_array();
2267/// assert_eq!(c, [1.0, 4.0]);
2268/// ```
2269#[must_use]
2270#[inline(always)]
2271#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2272pub fn shuffle_abi_f64_all_m128d<const MASK: i32>(a: m128d, b: m128d) -> m128d {
2273 m128d(unsafe { _mm_shuffle_pd(a.0, b.0, MASK) })
2274}
2275
2276/// Shuffle the high `i16` lanes in `$a` using an immediate control value.
2277/// ```
2278/// # use safe_arch::*;
2279/// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
2280/// let c = shuffle_ai_i16_h64all_m128i::<0b01_00_10_11>(a);
2281/// assert_eq!(<[i16; 8]>::from(c), [1_i16, 2, 3, 4, 8, 7, 5, 6]);
2282/// ```
2283/// * **Intrinsic:** [`_mm_shufflehi_epi16`]
2284/// * **Assembly:** `pshufhw xmm, xmm, imm8`
2285#[must_use]
2286#[inline(always)]
2287#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2288pub fn shuffle_ai_i16_h64all_m128i<const MASK: i32>(a: m128i) -> m128i {
2289 m128i(unsafe { _mm_shufflehi_epi16(a.0, MASK) })
2290}
2291
2292/// Shuffle the low `i16` lanes in `$a` using an immediate control value.
2293/// ```
2294/// # use safe_arch::*;
2295/// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
2296/// //
2297/// let c = shuffle_ai_i16_l64all_m128i::<0b01_11_10_00>(a);
2298/// assert_eq!(<[i16; 8]>::from(c), [1_i16, 3, 4, 2, 5, 6, 7, 8]);
2299/// ```
2300/// * **Intrinsic:** [`_mm_shufflelo_epi16`]
2301/// * **Assembly:** `pshuflw xmm, xmm, imm8`
2302#[must_use]
2303#[inline(always)]
2304#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2305pub fn shuffle_ai_i16_l64all_m128i<const MASK: i32>(a: m128i) -> m128i {
2306 m128i(unsafe { _mm_shufflelo_epi16(a.0, MASK) })
2307}
2308
2309/// Shift all `u16` lanes to the left by the `count` in the lower `u64` lane.
2310///
2311/// New bits are 0s.
2312/// ```
2313/// # use safe_arch::*;
2314/// let a = m128i::from([1_u16, 2, 3, 4, 1, 2, 3, 4]);
2315/// let b = m128i::from([3_u64, 0]);
2316/// let c: [u16; 8] = shl_all_u16_m128i(a, b).into();
2317/// assert_eq!(c, [1_u16 << 3, 2 << 3, 3 << 3, 4 << 3, 1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2318/// ```
2319#[must_use]
2320#[inline(always)]
2321#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2322pub fn shl_all_u16_m128i(a: m128i, count: m128i) -> m128i {
2323 m128i(unsafe { _mm_sll_epi16(a.0, count.0) })
2324}
2325
2326/// Shift all `u32` lanes to the left by the `count` in the lower `u64` lane.
2327///
2328/// New bits are 0s.
2329/// ```
2330/// # use safe_arch::*;
2331/// let a = m128i::from([1_u32, 2, 3, 4]);
2332/// let b = m128i::from([3_u64, 0]);
2333/// let c: [u32; 4] = shl_all_u32_m128i(a, b).into();
2334/// assert_eq!(c, [1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2335/// ```
2336#[must_use]
2337#[inline(always)]
2338#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2339pub fn shl_all_u32_m128i(a: m128i, count: m128i) -> m128i {
2340 m128i(unsafe { _mm_sll_epi32(a.0, count.0) })
2341}
2342
2343/// Shift all `u64` lanes to the left by the `count` in the lower `u64` lane.
2344///
2345/// New bits are 0s.
2346/// ```
2347/// # use safe_arch::*;
2348/// let a = m128i::from([1_u64, 2]);
2349/// let b = m128i::from([3_u64, 0]);
2350/// let c: [u64; 2] = shl_all_u64_m128i(a, b).into();
2351/// assert_eq!(c, [1 << 3, 2 << 3]);
2352/// ```
2353#[must_use]
2354#[inline(always)]
2355#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2356pub fn shl_all_u64_m128i(a: m128i, count: m128i) -> m128i {
2357 m128i(unsafe { _mm_sll_epi64(a.0, count.0) })
2358}
2359
2360/// Shifts all `u16` lanes left by an immediate.
2361///
2362/// ```
2363/// # use safe_arch::*;
2364/// let a = m128i::from([1_u16, 2, 3, 4, 1, 2, 3, 4]);
2365/// let c: [u16; 8] = shl_imm_u16_m128i::<3>(a).into();
2366/// assert_eq!(c, [1_u16 << 3, 2 << 3, 3 << 3, 4 << 3, 1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2367/// ```
2368#[must_use]
2369#[inline(always)]
2370#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2371pub fn shl_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i {
2372 m128i(unsafe { _mm_slli_epi16(a.0, IMM) })
2373}
2374
2375/// Shifts all `u32` lanes left by an immediate.
2376///
2377/// ```
2378/// # use safe_arch::*;
2379/// let a = m128i::from([1, 2, 3, 4]);
2380/// let c: [u32; 4] = shl_imm_u32_m128i::<3>(a).into();
2381/// assert_eq!(c, [1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2382/// ```
2383#[must_use]
2384#[inline(always)]
2385#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2386pub fn shl_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i {
2387 m128i(unsafe { _mm_slli_epi32(a.0, IMM) })
2388}
2389
2390/// Shifts both `u64` lanes left by an immediate.
2391///
2392/// ```
2393/// # use safe_arch::*;
2394/// let a = m128i::from([1_u64, 2]);
2395/// let c: [u64; 2] = shl_imm_u64_m128i::<3>(a).into();
2396/// assert_eq!(c, [1_u64 << 3, 2 << 3]);
2397/// ```
2398#[must_use]
2399#[inline(always)]
2400#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2401pub fn shl_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i {
2402 m128i(unsafe { _mm_slli_epi64(a.0, IMM) })
2403}
2404
2405/// Lanewise `sqrt(a)`.
2406/// ```
2407/// # use safe_arch::*;
2408/// let a = m128d::from_array([25.0, 16.0]);
2409/// let b = sqrt_m128d(a).to_array();
2410/// assert_eq!(b, [5.0, 4.0]);
2411/// ```
2412#[must_use]
2413#[inline(always)]
2414#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2415pub fn sqrt_m128d(a: m128d) -> m128d {
2416 m128d(unsafe { _mm_sqrt_pd(a.0) })
2417}
2418
2419/// Low lane `sqrt(b)`, upper lane is unchanged from `a`.
2420/// ```
2421/// # use safe_arch::*;
2422/// let a = m128d::from_array([1.0, 2.0]);
2423/// let b = m128d::from_array([25.0, 4.0]);
2424/// let c = sqrt_m128d_s(a, b);
2425/// assert_eq!(c.to_array(), [5.0, 2.0]);
2426/// ```
2427#[must_use]
2428#[inline(always)]
2429#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2430pub fn sqrt_m128d_s(a: m128d, b: m128d) -> m128d {
2431 m128d(unsafe { _mm_sqrt_sd(a.0, b.0) })
2432}
2433
2434/// Shift each `i16` lane to the right by the `count` in the lower `i64` lane.
2435///
2436/// New bits are the sign bit.
2437/// ```
2438/// # use safe_arch::*;
2439/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2440/// let b = m128i::from([3_i64, 0]);
2441/// let c: [i16; 8] = shr_all_i16_m128i(a, b).into();
2442/// assert_eq!(c, [1_i16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, -1 >> 3, -2 >> 3, -3 >> 3, -4 >> 3]);
2443/// ```
2444#[must_use]
2445#[inline(always)]
2446#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2447pub fn shr_all_i16_m128i(a: m128i, count: m128i) -> m128i {
2448 m128i(unsafe { _mm_sra_epi16(a.0, count.0) })
2449}
2450
2451/// Shift each `i32` lane to the right by the `count` in the lower `i64` lane.
2452///
2453/// New bits are the sign bit.
2454/// ```
2455/// # use safe_arch::*;
2456/// let a = m128i::from([1_i32, 2, -3, -4]);
2457/// let b = m128i::from([3_i64, 0]);
2458/// let c: [i32; 4] = shr_all_i32_m128i(a, b).into();
2459/// assert_eq!(c, [1 >> 3, 2 >> 3, -3 >> 3, -4 >> 3]);
2460/// ```
2461#[must_use]
2462#[inline(always)]
2463#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2464pub fn shr_all_i32_m128i(a: m128i, count: m128i) -> m128i {
2465 m128i(unsafe { _mm_sra_epi32(a.0, count.0) })
2466}
2467
2468/// Shifts all `i16` lanes right by an immediate.
2469///
2470/// New bits are the sign bit.
2471///
2472/// ```
2473/// # use safe_arch::*;
2474/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2475/// let c: [i16; 8] = shr_imm_i16_m128i::<3>(a).into();
2476/// assert_eq!(c, [1_i16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, -1 >> 3, -2 >> 3, -3 >> 3, -4 >> 3]);
2477/// ```
2478#[must_use]
2479#[inline(always)]
2480#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2481pub fn shr_imm_i16_m128i<const IMM: i32>(a: m128i) -> m128i {
2482 m128i(unsafe { _mm_srai_epi16(a.0, IMM) })
2483}
2484
2485/// Shifts all `i32` lanes right by an immediate.
2486///
2487/// New bits are the sign bit.
2488///
2489/// ```
2490/// # use safe_arch::*;
2491/// let a = m128i::from([1, 2, -3, -4]);
2492/// let c: [i32; 4] = shr_imm_i32_m128i::<3>(a).into();
2493/// assert_eq!(c, [1 >> 3, 2 >> 3, -3 >> 3, -4 >> 3]);
2494/// ```
2495#[must_use]
2496#[inline(always)]
2497#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2498pub fn shr_imm_i32_m128i<const IMM: i32>(a: m128i) -> m128i {
2499 m128i(unsafe { _mm_srai_epi32(a.0, IMM) })
2500}
2501
2502/// Shift each `u16` lane to the right by the `count` in the lower `u64` lane.
2503///
2504/// ```
2505/// # use safe_arch::*;
2506/// let a = m128i::from([1_u16, 2, 3, 4, 100, 200, 300, 400]);
2507/// let b = m128i::from([3_u64, 0]);
2508/// let c: [u16; 8] = shr_all_u16_m128i(a, b).into();
2509/// assert_eq!(c, [1_u16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, 100 >> 3, 200 >> 3, 300 >> 3, 400 >> 3,]);
2510/// ```
2511#[must_use]
2512#[inline(always)]
2513#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2514pub fn shr_all_u16_m128i(a: m128i, count: m128i) -> m128i {
2515 m128i(unsafe { _mm_srl_epi16(a.0, count.0) })
2516}
2517
2518/// Shift each `u32` lane to the right by the `count` in the lower `u64` lane.
2519///
2520/// ```
2521/// # use safe_arch::*;
2522/// let a = m128i::from([1_u32, 2, 300, 400]);
2523/// let b = m128i::from([3_u64, 0]);
2524/// let c: [u32; 4] = shr_all_u32_m128i(a, b).into();
2525/// assert_eq!(c, [1 >> 3, 2 >> 3, 300 >> 3, 400 >> 3,]);
2526/// ```
2527#[must_use]
2528#[inline(always)]
2529#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2530pub fn shr_all_u32_m128i(a: m128i, count: m128i) -> m128i {
2531 m128i(unsafe { _mm_srl_epi32(a.0, count.0) })
2532}
2533
2534/// Shift each `u64` lane to the right by the `count` in the lower `u64` lane.
2535///
2536/// New bits are 0s.
2537/// ```
2538/// # use safe_arch::*;
2539/// let a = m128i::from([1_u64, 56]);
2540/// let b = m128i::from([3_u64, 0]);
2541/// let c: [u64; 2] = shr_all_u64_m128i(a, b).into();
2542/// assert_eq!(c, [1 >> 3, 56 >> 3]);
2543/// ```
2544#[must_use]
2545#[inline(always)]
2546#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2547pub fn shr_all_u64_m128i(a: m128i, count: m128i) -> m128i {
2548 m128i(unsafe { _mm_srl_epi64(a.0, count.0) })
2549}
2550
2551/// Shifts all `u16` lanes right by an immediate.
2552///
2553/// New bits are 0s.
2554///
2555/// ```
2556/// # use safe_arch::*;
2557/// let a = m128i::from([1_u16, 2, 3, 4, 100, 200, 300, 400]);
2558/// let c: [u16; 8] = shr_imm_u16_m128i::<3>(a).into();
2559/// assert_eq!(c, [1_u16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, 100 >> 3, 200 >> 3, 300 >> 3, 400 >> 3,]);
2560/// ```
2561/// * **Intrinsic:** [`_mm_srli_epi16`]
2562/// * **Assembly:** `psrlw xmm, imm8`
2563#[must_use]
2564#[inline(always)]
2565#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2566pub fn shr_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i {
2567 m128i(unsafe { _mm_srli_epi16(a.0, IMM) })
2568}
2569
2570/// Shifts all `u32` lanes right by an immediate.
2571///
2572/// ```
2573/// # use safe_arch::*;
2574/// let a = m128i::from([1, 2, 300, 400]);
2575/// let c: [u32; 4] = shr_imm_u32_m128i::<3>(a).into();
2576/// assert_eq!(c, [1 >> 3, 2 >> 3, 300 >> 3, 400 >> 3]);
2577/// ```
2578/// * **Intrinsic:** [`_mm_srli_epi32`]
2579/// * **Assembly:** `psrld xmm, imm8`
2580#[must_use]
2581#[inline(always)]
2582#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2583pub fn shr_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i {
2584 m128i(unsafe { _mm_srli_epi32(a.0, IMM) })
2585}
2586
2587/// Shifts both `u64` lanes right by an immediate.
2588///
2589/// ```
2590/// # use safe_arch::*;
2591/// let a = m128i::from([1_u64, 200]);
2592/// let c: [u64; 2] = shr_imm_u64_m128i::<3>(a).into();
2593/// assert_eq!(c, [1_u64 >> 3, 200 >> 3]);
2594/// ```
2595/// * **Intrinsic:** [`_mm_srli_epi64`]
2596/// * **Assembly:** `psrlq xmm, imm8`
2597#[must_use]
2598#[inline(always)]
2599#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2600pub fn shr_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i {
2601 m128i(unsafe { _mm_srli_epi64(a.0, IMM) })
2602}
2603
2604/// Stores the value to the reference given.
2605/// ```
2606/// # use safe_arch::*;
2607/// let a = m128d::from_array([10.0, 12.0]);
2608/// let mut b = zeroed_m128d();
2609/// store_m128d(&mut b, a);
2610/// let c = b.to_array();
2611/// assert_eq!(c, [10.0, 12.0]);
2612/// ```
2613#[inline(always)]
2614#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2615pub fn store_m128d(r: &mut m128d, a: m128d) {
2616 unsafe { _mm_store_pd(r as *mut m128d as *mut f64, a.0) }
2617}
2618
2619/// Stores the low lane value to the reference given.
2620/// ```
2621/// # use safe_arch::*;
2622/// let a = m128d::from_array([10.0, 12.0]);
2623/// let mut f = 0.0;
2624/// store_m128d_s(&mut f, a);
2625/// assert_eq!(f, 10.0);
2626/// ```
2627#[inline(always)]
2628#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2629pub fn store_m128d_s(r: &mut f64, a: m128d) {
2630 unsafe { _mm_store_sd(r as *mut f64, a.0) }
2631}
2632
2633/// Stores the low lane value to all lanes of the reference given.
2634/// ```
2635/// # use safe_arch::*;
2636/// let a = m128d::from_array([10.0, 12.0]);
2637/// let mut b = zeroed_m128d();
2638/// store_splat_m128d(&mut b, a);
2639/// let c = b.to_array();
2640/// assert_eq!(c, [10.0, 10.0]);
2641/// ```
2642#[inline(always)]
2643#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2644pub fn store_splat_m128d(r: &mut m128d, a: m128d) {
2645 unsafe { _mm_store1_pd(r as *mut m128d as *mut f64, a.0) }
2646}
2647
2648/// Stores the value to the reference given.
2649/// ```
2650/// # use safe_arch::*;
2651/// let a = m128i::from([1, 2, 3, 4]);
2652/// let mut b = zeroed_m128i();
2653/// store_m128i(&mut b, a);
2654/// let c: [i32; 4] = b.into();
2655/// assert_eq!(c, [1, 2, 3, 4]);
2656/// ```
2657#[inline(always)]
2658#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2659pub fn store_m128i(r: &mut m128i, a: m128i) {
2660 unsafe { _mm_store_si128(&mut r.0, a.0) }
2661}
2662
2663/// Stores the high lane value to the reference given.
2664/// ```
2665/// # use safe_arch::*;
2666/// let a = m128d::from_array([10.0, 12.0]);
2667/// let mut f = 0.0;
2668/// store_high_m128d_s(&mut f, a);
2669/// assert_eq!(f, 12.0);
2670/// ```
2671#[inline(always)]
2672#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2673pub fn store_high_m128d_s(r: &mut f64, a: m128d) {
2674 unsafe { _mm_storeh_pd(r as *mut f64, a.0) }
2675}
2676
2677/// Stores the value to the reference given.
2678/// ```
2679/// # use safe_arch::*;
2680/// let a = m128i::from([1_i64, 2]);
2681/// let mut b = 0_i64;
2682/// store_i64_m128i_s(&mut b, a);
2683/// assert_eq!(b, 1_i64);
2684/// ```
2685#[inline(always)]
2686#[allow(clippy::cast_ptr_alignment)]
2687#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2688pub fn store_i64_m128i_s(r: &mut i64, a: m128i) {
2689 unsafe { _mm_storel_epi64(r as *mut i64 as *mut __m128i, a.0) }
2690}
2691
2692/// Stores the value to the reference given.
2693/// ```
2694/// # use safe_arch::*;
2695/// let a = m128d::from_array([10.0, 12.0]);
2696/// let mut b = zeroed_m128d();
2697/// store_reversed_m128d(&mut b, a);
2698/// let c = b.to_array();
2699/// assert_eq!(c, [12.0, 10.0]);
2700/// ```
2701#[inline(always)]
2702#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2703pub fn store_reversed_m128d(r: &mut m128d, a: m128d) {
2704 unsafe { _mm_storer_pd(r as *mut m128d as *mut f64, a.0) }
2705}
2706
2707/// Stores the value to the reference given.
2708/// ```
2709/// # use safe_arch::*;
2710/// let a = m128d::from_array([10.0, 12.0]);
2711/// let mut b = [0.0, 0.0];
2712/// store_unaligned_m128d(&mut b, a);
2713/// assert_eq!(b, [10.0, 12.0]);
2714/// ```
2715#[inline(always)]
2716#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2717pub fn store_unaligned_m128d(r: &mut [f64; 2], a: m128d) {
2718 unsafe { _mm_storeu_pd(r.as_mut_ptr(), a.0) }
2719}
2720
2721/// Stores the value to the reference given.
2722/// ```
2723/// # use safe_arch::*;
2724/// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2725/// let mut b = [0_u8; 16];
2726/// store_unaligned_m128i(&mut b, a);
2727/// assert_eq!(b, [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2728/// ```
2729#[inline(always)]
2730#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2731pub fn store_unaligned_m128i(r: &mut [u8; 16], a: m128i) {
2732 unsafe { _mm_storeu_si128(r.as_mut_ptr().cast(), a.0) }
2733}
2734
2735/// Lanewise `a - b` with lanes as `i8`.
2736/// ```
2737/// # use safe_arch::*;
2738/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2739/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2740/// let c: [i8; 16] = sub_i8_m128i(a, b).into();
2741/// assert_eq!(c, [0, -10, 0, -10, 0, -10, 0, -10, 0, -10, 30, -10, -10, 36, -10, -112]);
2742/// ```
2743#[must_use]
2744#[inline(always)]
2745#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2746pub fn sub_i8_m128i(a: m128i, b: m128i) -> m128i {
2747 m128i(unsafe { _mm_sub_epi8(a.0, b.0) })
2748}
2749
2750/// Lanewise `a - b` with lanes as `i16`.
2751/// ```
2752/// # use safe_arch::*;
2753/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2754/// let b = m128i::from([51_i16, 61, 71, 81, -15, -26, -37, 48]);
2755/// let c: [i16; 8] = sub_i16_m128i(a, b).into();
2756/// assert_eq!(c, [-50, -59, -68, -77, 14, 24, 34, -52]);
2757/// ```
2758#[must_use]
2759#[inline(always)]
2760#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2761pub fn sub_i16_m128i(a: m128i, b: m128i) -> m128i {
2762 m128i(unsafe { _mm_sub_epi16(a.0, b.0) })
2763}
2764
2765/// Lanewise `a - b` with lanes as `i32`.
2766/// ```
2767/// # use safe_arch::*;
2768/// let a = m128i::from([1, 2, 3, 4]);
2769/// let b = m128i::from([50, 60, 70, 87]);
2770/// let c: [i32; 4] = sub_i32_m128i(a, b).into();
2771/// assert_eq!(c, [-49, -58, -67, -83]);
2772/// ```
2773#[must_use]
2774#[inline(always)]
2775#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2776pub fn sub_i32_m128i(a: m128i, b: m128i) -> m128i {
2777 m128i(unsafe { _mm_sub_epi32(a.0, b.0) })
2778}
2779
2780/// Lanewise `a - b` with lanes as `i64`.
2781/// ```
2782/// # use safe_arch::*;
2783/// let a = m128i::from([92_i64, 87]);
2784/// let b = m128i::from([-9001_i64, 1]);
2785/// let c: [i64; 2] = sub_i64_m128i(a, b).into();
2786/// assert_eq!(c, [9093, 86]);
2787/// ```
2788#[must_use]
2789#[inline(always)]
2790#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2791pub fn sub_i64_m128i(a: m128i, b: m128i) -> m128i {
2792 m128i(unsafe { _mm_sub_epi64(a.0, b.0) })
2793}
2794
2795/// Lanewise `a - b`.
2796/// ```
2797/// # use safe_arch::*;
2798/// let a = m128d::from_array([92.0, 87.5]);
2799/// let b = m128d::from_array([100.0, -6.0]);
2800/// let c = sub_m128d(a, b).to_array();
2801/// assert_eq!(c, [-8.0, 93.5]);
2802/// ```
2803#[must_use]
2804#[inline(always)]
2805#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2806pub fn sub_m128d(a: m128d, b: m128d) -> m128d {
2807 m128d(unsafe { _mm_sub_pd(a.0, b.0) })
2808}
2809
2810/// Lowest lane `a - b`, high lane unchanged.
2811/// ```
2812/// # use safe_arch::*;
2813/// let a = m128d::from_array([92.0, 87.5]);
2814/// let b = m128d::from_array([100.0, -600.0]);
2815/// let c = sub_m128d_s(a, b).to_array();
2816/// assert_eq!(c, [-8.0, 87.5]);
2817/// ```
2818#[must_use]
2819#[inline(always)]
2820#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2821pub fn sub_m128d_s(a: m128d, b: m128d) -> m128d {
2822 m128d(unsafe { _mm_sub_sd(a.0, b.0) })
2823}
2824
2825/// Lanewise saturating `a - b` with lanes as `i8`.
2826/// ```
2827/// # use safe_arch::*;
2828/// let a = m128i::from([0_i8, -128, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -127]);
2829/// let b = m128i::from([0_i8, 1, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2830/// let c: [i8; 16] = sub_saturating_i8_m128i(a, b).into();
2831/// assert_eq!(c, [0, -128, 0, -10, 0, -10, 0, -10, 0, -10, 30, -10, -10, 36, -10, -128]);
2832/// ```
2833#[must_use]
2834#[inline(always)]
2835#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2836pub fn sub_saturating_i8_m128i(a: m128i, b: m128i) -> m128i {
2837 m128i(unsafe { _mm_subs_epi8(a.0, b.0) })
2838}
2839
2840/// Lanewise saturating `a - b` with lanes as `i16`.
2841/// ```
2842/// # use safe_arch::*;
2843/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2844/// let b = m128i::from([51_i16, 61, 71, 81, i16::MAX, -26, -37, 48]);
2845/// let c: [i16; 8] = sub_saturating_i16_m128i(a, b).into();
2846/// assert_eq!(c, [-50, -59, -68, -77, -32768, 24, 34, -52]);
2847/// ```
2848#[must_use]
2849#[inline(always)]
2850#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2851pub fn sub_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
2852 m128i(unsafe { _mm_subs_epi16(a.0, b.0) })
2853}
2854
2855/// Lanewise saturating `a - b` with lanes as `u8`.
2856/// ```
2857/// # use safe_arch::*;
2858/// let a = m128i::from([10_u8, 255, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255]);
2859/// let b = m128i::from([1_u8, 1, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
2860/// let c: [u8; 16] = sub_saturating_u8_m128i(a, b).into();
2861/// assert_eq!(c, [9_u8, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128]);
2862/// ```
2863#[must_use]
2864#[inline(always)]
2865#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2866pub fn sub_saturating_u8_m128i(a: m128i, b: m128i) -> m128i {
2867 m128i(unsafe { _mm_subs_epu8(a.0, b.0) })
2868}
2869
2870/// Lanewise saturating `a - b` with lanes as `u16`.
2871/// ```
2872/// # use safe_arch::*;
2873/// let a = m128i::from([51_u16, 61, 3, 4, u16::MAX, 2, 3, u16::MAX]);
2874/// let b = m128i::from([5_u16, 2, 71, 81, u16::MAX, 26, 37, u16::MIN]);
2875/// let c: [u16; 8] = sub_saturating_u16_m128i(a, b).into();
2876/// assert_eq!(c, [46, 59, 0, 0, 0, 0, 0, u16::MAX]);
2877/// ```
2878#[must_use]
2879#[inline(always)]
2880#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2881pub fn sub_saturating_u16_m128i(a: m128i, b: m128i) -> m128i {
2882 m128i(unsafe { _mm_subs_epu16(a.0, b.0) })
2883}
2884
2885/// Unpack and interleave high `i8` lanes of `a` and `b`.
2886/// ```
2887/// # use safe_arch::*;
2888/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2889/// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2890/// let c: [i8; 16] = unpack_high_i8_m128i(a, b).into();
2891/// assert_eq!(c, [8, 8, 9, 19, 10, -20, 11, 21, 12, 22, 13, -23, 14, 24, 15, 127]);
2892/// ```
2893#[must_use]
2894#[inline(always)]
2895#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2896pub fn unpack_high_i8_m128i(a: m128i, b: m128i) -> m128i {
2897 m128i(unsafe { _mm_unpackhi_epi8(a.0, b.0) })
2898}
2899
2900/// Unpack and interleave high `i16` lanes of `a` and `b`.
2901/// ```
2902/// # use safe_arch::*;
2903/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2904/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
2905/// let c: [i16; 8] = unpack_high_i16_m128i(a, b).into();
2906/// assert_eq!(c, [-1, -15, -2, -26, -3, -37, -4, 48]);
2907/// ```
2908#[must_use]
2909#[inline(always)]
2910#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2911pub fn unpack_high_i16_m128i(a: m128i, b: m128i) -> m128i {
2912 m128i(unsafe { _mm_unpackhi_epi16(a.0, b.0) })
2913}
2914
2915/// Unpack and interleave high `i32` lanes of `a` and `b`.
2916/// ```
2917/// # use safe_arch::*;
2918/// let a = m128i::from([1, 2, 3, 4]);
2919/// let b = m128i::from([5, 6, 7, 8]);
2920/// let c: [i32; 4] = unpack_high_i32_m128i(a, b).into();
2921/// assert_eq!(c, [3, 7, 4, 8]);
2922/// ```
2923#[must_use]
2924#[inline(always)]
2925#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2926pub fn unpack_high_i32_m128i(a: m128i, b: m128i) -> m128i {
2927 m128i(unsafe { _mm_unpackhi_epi32(a.0, b.0) })
2928}
2929
2930/// Unpack and interleave high `i64` lanes of `a` and `b`.
2931/// ```
2932/// # use safe_arch::*;
2933/// let a = m128i::from([92_i64, 87]);
2934/// let b = m128i::from([-9001_i64, 1]);
2935/// let c: [i64; 2] = unpack_high_i64_m128i(a, b).into();
2936/// assert_eq!(c, [87, 1]);
2937/// ```
2938#[must_use]
2939#[inline(always)]
2940#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2941pub fn unpack_high_i64_m128i(a: m128i, b: m128i) -> m128i {
2942 m128i(unsafe { _mm_unpackhi_epi64(a.0, b.0) })
2943}
2944
2945/// Unpack and interleave high lanes of `a` and `b`.
2946/// ```
2947/// # use safe_arch::*;
2948/// let a = m128d::from_array([92.0, 87.5]);
2949/// let b = m128d::from_array([100.0, -6.0]);
2950/// let c = unpack_high_m128d(a, b).to_array();
2951/// assert_eq!(c, [87.5, -6.0]);
2952/// ```
2953#[must_use]
2954#[inline(always)]
2955#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2956pub fn unpack_high_m128d(a: m128d, b: m128d) -> m128d {
2957 m128d(unsafe { _mm_unpackhi_pd(a.0, b.0) })
2958}
2959
2960/// Unpack and interleave low `i8` lanes of `a` and `b`.
2961/// ```
2962/// # use safe_arch::*;
2963/// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2964/// let b = m128i::from([12_i8, 11, 22, 13, 99, 15, 16, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2965/// let c: [i8; 16] = unpack_low_i8_m128i(a, b).into();
2966/// assert_eq!(c, [0, 12, 1, 11, 2, 22, 3, 13, 4, 99, 5, 15, 6, 16, 7, 17]);
2967/// ```
2968#[must_use]
2969#[inline(always)]
2970#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2971pub fn unpack_low_i8_m128i(a: m128i, b: m128i) -> m128i {
2972 m128i(unsafe { _mm_unpacklo_epi8(a.0, b.0) })
2973}
2974
2975/// Unpack and interleave low `i16` lanes of `a` and `b`.
2976/// ```
2977/// # use safe_arch::*;
2978/// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2979/// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
2980/// let c: [i16; 8] = unpack_low_i16_m128i(a, b).into();
2981/// assert_eq!(c, [1, 5, 2, 6, 3, 7, 4, 8]);
2982/// ```
2983#[must_use]
2984#[inline(always)]
2985#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
2986pub fn unpack_low_i16_m128i(a: m128i, b: m128i) -> m128i {
2987 m128i(unsafe { _mm_unpacklo_epi16(a.0, b.0) })
2988}
2989
2990/// Unpack and interleave low `i32` lanes of `a` and `b`.
2991/// ```
2992/// # use safe_arch::*;
2993/// let a = m128i::from([1, 2, 3, 4]);
2994/// let b = m128i::from([5, 6, 7, 8]);
2995/// let c: [i32; 4] = unpack_low_i32_m128i(a, b).into();
2996/// assert_eq!(c, [1, 5, 2, 6]);
2997/// ```
2998#[must_use]
2999#[inline(always)]
3000#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
3001pub fn unpack_low_i32_m128i(a: m128i, b: m128i) -> m128i {
3002 m128i(unsafe { _mm_unpacklo_epi32(a.0, b.0) })
3003}
3004
3005/// Unpack and interleave low `i64` lanes of `a` and `b`.
3006/// ```
3007/// # use safe_arch::*;
3008/// let a = m128i::from([92_i64, 87]);
3009/// let b = m128i::from([-9001_i64, 1]);
3010/// let c: [i64; 2] = unpack_low_i64_m128i(a, b).into();
3011/// assert_eq!(c, [92, -9001]);
3012/// ```
3013#[must_use]
3014#[inline(always)]
3015#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
3016pub fn unpack_low_i64_m128i(a: m128i, b: m128i) -> m128i {
3017 m128i(unsafe { _mm_unpacklo_epi64(a.0, b.0) })
3018}
3019
3020/// Unpack and interleave low lanes of `a` and `b`.
3021/// ```
3022/// # use safe_arch::*;
3023/// let a = m128d::from_array([92.0, 87.5]);
3024/// let b = m128d::from_array([100.0, -6.0]);
3025/// let c = unpack_low_m128d(a, b).to_array();
3026/// assert_eq!(c, [92.0, 100.0]);
3027/// ```
3028#[must_use]
3029#[inline(always)]
3030#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
3031pub fn unpack_low_m128d(a: m128d, b: m128d) -> m128d {
3032 m128d(unsafe { _mm_unpacklo_pd(a.0, b.0) })
3033}
3034
3035/// Bitwise `a ^ b`.
3036/// ```
3037/// # use safe_arch::*;
3038/// let a = m128d::from_array([1.0, 0.0]);
3039/// let b = m128d::from_array([1.0, 1.0]);
3040/// let c = bitxor_m128d(a, b).to_array();
3041/// assert_eq!(c, [0.0, 1.0]);
3042/// ```
3043#[must_use]
3044#[inline(always)]
3045#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
3046pub fn bitxor_m128d(a: m128d, b: m128d) -> m128d {
3047 m128d(unsafe { _mm_xor_pd(a.0, b.0) })
3048}
3049
3050/// Bitwise `a ^ b`.
3051/// ```
3052/// # use safe_arch::*;
3053/// let a = m128i::from([1, 0, 1, 0]);
3054/// let b = m128i::from([1, 1, 0, 0]);
3055/// let c: [i32; 4] = bitxor_m128i(a, b).into();
3056/// assert_eq!(c, [0, 1, 1, 0]);
3057/// ```
3058#[must_use]
3059#[inline(always)]
3060#[cfg_attr(docsrs, doc(cfg(target_feature = "sse2")))]
3061pub fn bitxor_m128i(a: m128i, b: m128i) -> m128i {
3062 m128i(unsafe { _mm_xor_si128(a.0, b.0) })
3063}
3064
3065//
3066// Here we define the Operator Overloads for `m128`. Each one just calls the
3067// correct function from above. By putting the impls here and not with the
3068// `m128` type we theoretically would be able to build the crate safely even if
3069// there's no `sse` feature enabled. You'd just have a `m128` type without the
3070// operator overloads is all. Not that the standard Rust distribution can build
3071// properly without `sse` enabled, but maybe you're using a custom target or
3072// something. It doesn't really put us out of our way, so it doesn't hurt to try
3073// and accommodate the potential use case.
3074//
3075
3076// First we provide all `m128d` impls.
3077
3078impl Add for m128d {
3079 type Output = Self;
3080 #[must_use]
3081 #[inline(always)]
3082 fn add(self, rhs: Self) -> Self {
3083 add_m128d(self, rhs)
3084 }
3085}
3086impl AddAssign for m128d {
3087 #[inline(always)]
3088 fn add_assign(&mut self, rhs: Self) {
3089 *self = *self + rhs;
3090 }
3091}
3092
3093impl BitAnd for m128d {
3094 type Output = Self;
3095 #[must_use]
3096 #[inline(always)]
3097 fn bitand(self, rhs: Self) -> Self {
3098 bitand_m128d(self, rhs)
3099 }
3100}
3101impl BitAndAssign for m128d {
3102 #[inline(always)]
3103 fn bitand_assign(&mut self, rhs: Self) {
3104 *self = *self & rhs;
3105 }
3106}
3107
3108impl BitOr for m128d {
3109 type Output = Self;
3110 #[must_use]
3111 #[inline(always)]
3112 fn bitor(self, rhs: Self) -> Self {
3113 bitor_m128d(self, rhs)
3114 }
3115}
3116impl BitOrAssign for m128d {
3117 #[inline(always)]
3118 fn bitor_assign(&mut self, rhs: Self) {
3119 *self = *self | rhs;
3120 }
3121}
3122
3123impl BitXor for m128d {
3124 type Output = Self;
3125 #[must_use]
3126 #[inline(always)]
3127 fn bitxor(self, rhs: Self) -> Self {
3128 bitxor_m128d(self, rhs)
3129 }
3130}
3131impl BitXorAssign for m128d {
3132 #[inline(always)]
3133 fn bitxor_assign(&mut self, rhs: Self) {
3134 *self = *self ^ rhs;
3135 }
3136}
3137
3138impl Div for m128d {
3139 type Output = Self;
3140 #[must_use]
3141 #[inline(always)]
3142 fn div(self, rhs: Self) -> Self {
3143 div_m128d(self, rhs)
3144 }
3145}
3146impl DivAssign for m128d {
3147 #[inline(always)]
3148 fn div_assign(&mut self, rhs: Self) {
3149 *self = *self / rhs;
3150 }
3151}
3152
3153impl Mul for m128d {
3154 type Output = Self;
3155 #[must_use]
3156 #[inline(always)]
3157 fn mul(self, rhs: Self) -> Self {
3158 mul_m128d(self, rhs)
3159 }
3160}
3161impl MulAssign for m128d {
3162 #[inline(always)]
3163 fn mul_assign(&mut self, rhs: Self) {
3164 *self = *self * rhs;
3165 }
3166}
3167
3168impl Neg for m128d {
3169 type Output = Self;
3170 #[must_use]
3171 #[inline(always)]
3172 fn neg(self) -> Self {
3173 sub_m128d(zeroed_m128d(), self)
3174 }
3175}
3176
3177impl Not for m128d {
3178 type Output = Self;
3179 /// Not a direct intrinsic, but it's very useful and the implementation is
3180 /// simple enough.
3181 ///
3182 /// Negates the bits by performing an `xor` with an all-1s bit pattern.
3183 #[must_use]
3184 #[inline(always)]
3185 fn not(self) -> Self {
3186 let all_bits = set_splat_m128d(f64::from_bits(u64::MAX));
3187 self ^ all_bits
3188 }
3189}
3190
3191impl Sub for m128d {
3192 type Output = Self;
3193 #[must_use]
3194 #[inline(always)]
3195 fn sub(self, rhs: Self) -> Self {
3196 sub_m128d(self, rhs)
3197 }
3198}
3199impl SubAssign for m128d {
3200 #[inline(always)]
3201 fn sub_assign(&mut self, rhs: Self) {
3202 *self = *self - rhs;
3203 }
3204}
3205
3206impl PartialEq for m128d {
3207 /// Not a direct intrinsic, this is a `cmp_eq_mask` and then a `move_mask`.
3208 #[must_use]
3209 #[inline(always)]
3210 fn eq(&self, other: &Self) -> bool {
3211 move_mask_m128d(cmp_eq_mask_m128d(*self, *other)) == 0b11
3212 }
3213}
3214
3215// Next we provide all `m128i` impls. Since the interpretation of the lanes
3216// depends on the operation used, we only provide the bit ops (which are "lane
3217// agnostic").
3218
3219impl BitAnd for m128i {
3220 type Output = Self;
3221 #[must_use]
3222 #[inline(always)]
3223 fn bitand(self, rhs: Self) -> Self {
3224 bitand_m128i(self, rhs)
3225 }
3226}
3227impl BitAndAssign for m128i {
3228 #[inline(always)]
3229 fn bitand_assign(&mut self, rhs: Self) {
3230 *self = *self & rhs;
3231 }
3232}
3233
3234impl BitOr for m128i {
3235 type Output = Self;
3236 #[must_use]
3237 #[inline(always)]
3238 fn bitor(self, rhs: Self) -> Self {
3239 bitor_m128i(self, rhs)
3240 }
3241}
3242impl BitOrAssign for m128i {
3243 #[inline(always)]
3244 fn bitor_assign(&mut self, rhs: Self) {
3245 *self = *self | rhs;
3246 }
3247}
3248
3249impl BitXor for m128i {
3250 type Output = Self;
3251 #[must_use]
3252 #[inline(always)]
3253 fn bitxor(self, rhs: Self) -> Self {
3254 bitxor_m128i(self, rhs)
3255 }
3256}
3257impl BitXorAssign for m128i {
3258 #[inline(always)]
3259 fn bitxor_assign(&mut self, rhs: Self) {
3260 *self = *self ^ rhs;
3261 }
3262}
3263
3264impl Not for m128i {
3265 type Output = Self;
3266 /// Not a direct intrinsic, but it's very useful and the implementation is
3267 /// simple enough.
3268 ///
3269 /// Negates the bits by performing an `xor` with an all-1s bit pattern.
3270 #[must_use]
3271 #[inline(always)]
3272 fn not(self) -> Self {
3273 let all_bits = set_splat_i32_m128i(-1);
3274 self ^ all_bits
3275 }
3276}
3277
3278impl PartialEq for m128i {
3279 /// Not a direct intrinsic, this is a `cmp_eq_mask_i8_m128i` and then a
3280 /// `move_mask_i8_m128i`.
3281 #[must_use]
3282 #[inline(always)]
3283 fn eq(&self, other: &Self) -> bool {
3284 move_mask_i8_m128i(cmp_eq_mask_i8_m128i(*self, *other)) == 0b11111111_11111111
3285 }
3286}
3287/// Unlike with the floating types, ints have absolute equality.
3288impl Eq for m128i {}
3289