core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15050///
15051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15052#[inline]
15053#[target_feature(enable = "avx512f")]
15054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15056#[rustc_legacy_const_generics(1)]
15057pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15058    unsafe {
15059        static_assert_sae!(SAE);
15060        let a = a.as_f32x16();
15061        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15062        transmute(r)
15063    }
15064}
15065
15066/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15068///
15069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15070#[inline]
15071#[target_feature(enable = "avx512f")]
15072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15074#[rustc_legacy_const_generics(3)]
15075pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15076    src: __m256i,
15077    k: __mmask16,
15078    a: __m512,
15079) -> __m256i {
15080    unsafe {
15081        static_assert_sae!(SAE);
15082        let a = a.as_f32x16();
15083        let src = src.as_i16x16();
15084        let r = vcvtps2ph(a, SAE, src, k);
15085        transmute(r)
15086    }
15087}
15088
15089/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15091///
15092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15093#[inline]
15094#[target_feature(enable = "avx512f")]
15095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15097#[rustc_legacy_const_generics(2)]
15098pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15099    unsafe {
15100        static_assert_sae!(SAE);
15101        let a = a.as_f32x16();
15102        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15103        transmute(r)
15104    }
15105}
15106
15107/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15108/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15109/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15110/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15111/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15112/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15114///
15115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15116#[inline]
15117#[target_feature(enable = "avx512f,avx512vl")]
15118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15119#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15120#[rustc_legacy_const_generics(3)]
15121pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15122    src: __m128i,
15123    k: __mmask8,
15124    a: __m256,
15125) -> __m128i {
15126    unsafe {
15127        static_assert_uimm_bits!(IMM8, 8);
15128        let a = a.as_f32x8();
15129        let src = src.as_i16x8();
15130        let r = vcvtps2ph256(a, IMM8, src, k);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15136/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15137/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15138/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15139/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15140/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15142///
15143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15144#[inline]
15145#[target_feature(enable = "avx512f,avx512vl")]
15146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15147#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15148#[rustc_legacy_const_generics(2)]
15149pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15150    unsafe {
15151        static_assert_uimm_bits!(IMM8, 8);
15152        let a = a.as_f32x8();
15153        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15154        transmute(r)
15155    }
15156}
15157
15158/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15159/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15160/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15161/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15162/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15163/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15165///
15166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15167#[inline]
15168#[target_feature(enable = "avx512f,avx512vl")]
15169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15170#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15171#[rustc_legacy_const_generics(3)]
15172pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15173    unsafe {
15174        static_assert_uimm_bits!(IMM8, 8);
15175        let a = a.as_f32x4();
15176        let src = src.as_i16x8();
15177        let r = vcvtps2ph128(a, IMM8, src, k);
15178        transmute(r)
15179    }
15180}
15181
15182/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15183/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15184/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15185/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15186/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15187/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15189///
15190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15191#[inline]
15192#[target_feature(enable = "avx512f,avx512vl")]
15193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15194#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15195#[rustc_legacy_const_generics(2)]
15196pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15197    unsafe {
15198        static_assert_uimm_bits!(IMM8, 8);
15199        let a = a.as_f32x4();
15200        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15201        transmute(r)
15202    }
15203}
15204
15205/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15207///
15208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15209#[inline]
15210#[target_feature(enable = "avx512f")]
15211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15213#[rustc_legacy_const_generics(1)]
15214pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15215    unsafe {
15216        static_assert_sae!(SAE);
15217        let a = a.as_f32x16();
15218        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15219        transmute(r)
15220    }
15221}
15222
15223/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15225///
15226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15227#[inline]
15228#[target_feature(enable = "avx512f")]
15229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15231#[rustc_legacy_const_generics(3)]
15232pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15233    unsafe {
15234        static_assert_sae!(SAE);
15235        let a = a.as_f32x16();
15236        let src = src.as_i16x16();
15237        let r = vcvtps2ph(a, SAE, src, k);
15238        transmute(r)
15239    }
15240}
15241
15242/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15250#[rustc_legacy_const_generics(2)]
15251pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15252    unsafe {
15253        static_assert_sae!(SAE);
15254        let a = a.as_f32x16();
15255        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15256        transmute(r)
15257    }
15258}
15259
15260/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15261/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15262/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15263/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15264/// * [`_MM_FROUND_TO_POS_INF`] : round up
15265/// * [`_MM_FROUND_TO_ZERO`] : truncate
15266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15267///
15268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15269#[inline]
15270#[target_feature(enable = "avx512f,avx512vl")]
15271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15272#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15273#[rustc_legacy_const_generics(3)]
15274pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15275    unsafe {
15276        static_assert_uimm_bits!(IMM8, 8);
15277        let a = a.as_f32x8();
15278        let src = src.as_i16x8();
15279        let r = vcvtps2ph256(a, IMM8, src, k);
15280        transmute(r)
15281    }
15282}
15283
15284/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15285/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15286/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15287/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15288/// * [`_MM_FROUND_TO_POS_INF`] : round up
15289/// * [`_MM_FROUND_TO_ZERO`] : truncate
15290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15291///
15292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15293#[inline]
15294#[target_feature(enable = "avx512f,avx512vl")]
15295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15296#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15297#[rustc_legacy_const_generics(2)]
15298pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15299    unsafe {
15300        static_assert_uimm_bits!(IMM8, 8);
15301        let a = a.as_f32x8();
15302        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15303        transmute(r)
15304    }
15305}
15306
15307/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15308/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15309/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15310/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15311/// * [`_MM_FROUND_TO_POS_INF`] : round up
15312/// * [`_MM_FROUND_TO_ZERO`] : truncate
15313/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15314///
15315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15316#[inline]
15317#[target_feature(enable = "avx512f,avx512vl")]
15318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15319#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15320#[rustc_legacy_const_generics(3)]
15321pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15322    unsafe {
15323        static_assert_uimm_bits!(IMM8, 8);
15324        let a = a.as_f32x4();
15325        let src = src.as_i16x8();
15326        let r = vcvtps2ph128(a, IMM8, src, k);
15327        transmute(r)
15328    }
15329}
15330
15331/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15332/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15333/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15334/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15335/// * [`_MM_FROUND_TO_POS_INF`] : round up
15336/// * [`_MM_FROUND_TO_ZERO`] : truncate
15337/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15338///
15339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15340#[inline]
15341#[target_feature(enable = "avx512f,avx512vl")]
15342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15344#[rustc_legacy_const_generics(2)]
15345pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15346    unsafe {
15347        static_assert_uimm_bits!(IMM8, 8);
15348        let a = a.as_f32x4();
15349        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15350        transmute(r)
15351    }
15352}
15353
15354/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15355/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356///
15357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15358#[inline]
15359#[target_feature(enable = "avx512f")]
15360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15362#[rustc_legacy_const_generics(1)]
15363pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15364    unsafe {
15365        static_assert_sae!(SAE);
15366        let a = a.as_i16x16();
15367        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15368        transmute(r)
15369    }
15370}
15371
15372/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15373/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15376#[inline]
15377#[target_feature(enable = "avx512f")]
15378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15382    unsafe {
15383        static_assert_sae!(SAE);
15384        let a = a.as_i16x16();
15385        let src = src.as_f32x16();
15386        let r = vcvtph2ps(a, src, k, SAE);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15393///
15394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15395#[inline]
15396#[target_feature(enable = "avx512f")]
15397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15398#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15399#[rustc_legacy_const_generics(2)]
15400pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15401    unsafe {
15402        static_assert_sae!(SAE);
15403        let a = a.as_i16x16();
15404        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15405        transmute(r)
15406    }
15407}
15408
15409/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15410///
15411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15412#[inline]
15413#[target_feature(enable = "avx512f")]
15414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15415#[cfg_attr(test, assert_instr(vcvtph2ps))]
15416pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15417    unsafe {
15418        transmute(vcvtph2ps(
15419            a.as_i16x16(),
15420            f32x16::ZERO,
15421            0b11111111_11111111,
15422            _MM_FROUND_NO_EXC,
15423        ))
15424    }
15425}
15426
15427/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428///
15429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15430#[inline]
15431#[target_feature(enable = "avx512f")]
15432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433#[cfg_attr(test, assert_instr(vcvtph2ps))]
15434pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15435    unsafe {
15436        transmute(vcvtph2ps(
15437            a.as_i16x16(),
15438            src.as_f32x16(),
15439            k,
15440            _MM_FROUND_NO_EXC,
15441        ))
15442    }
15443}
15444
15445/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15446///
15447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15448#[inline]
15449#[target_feature(enable = "avx512f")]
15450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15451#[cfg_attr(test, assert_instr(vcvtph2ps))]
15452pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15453    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15454}
15455
15456/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15457///
15458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15459#[inline]
15460#[target_feature(enable = "avx512f,avx512vl")]
15461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15462#[cfg_attr(test, assert_instr(vcvtph2ps))]
15463pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15464    unsafe {
15465        let convert = _mm256_cvtph_ps(a);
15466        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15467    }
15468}
15469
15470/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15471///
15472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15473#[inline]
15474#[target_feature(enable = "avx512f,avx512vl")]
15475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15476#[cfg_attr(test, assert_instr(vcvtph2ps))]
15477pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15478    unsafe {
15479        let convert = _mm256_cvtph_ps(a);
15480        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15481    }
15482}
15483
15484/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15485///
15486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15487#[inline]
15488#[target_feature(enable = "avx512f,avx512vl")]
15489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15490#[cfg_attr(test, assert_instr(vcvtph2ps))]
15491pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15492    unsafe {
15493        let convert = _mm_cvtph_ps(a);
15494        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15495    }
15496}
15497
15498/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15499///
15500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15501#[inline]
15502#[target_feature(enable = "avx512f,avx512vl")]
15503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15504#[cfg_attr(test, assert_instr(vcvtph2ps))]
15505pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15506    unsafe {
15507        let convert = _mm_cvtph_ps(a);
15508        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15509    }
15510}
15511
15512/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15519#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15520#[rustc_legacy_const_generics(1)]
15521pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15522    unsafe {
15523        static_assert_sae!(SAE);
15524        let a = a.as_f32x16();
15525        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15532///
15533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15534#[inline]
15535#[target_feature(enable = "avx512f")]
15536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15537#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15538#[rustc_legacy_const_generics(3)]
15539pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15540    src: __m512i,
15541    k: __mmask16,
15542    a: __m512,
15543) -> __m512i {
15544    unsafe {
15545        static_assert_sae!(SAE);
15546        let a = a.as_f32x16();
15547        let src = src.as_i32x16();
15548        let r = vcvttps2dq(a, src, k, SAE);
15549        transmute(r)
15550    }
15551}
15552
15553/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15554/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15555///
15556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15557#[inline]
15558#[target_feature(enable = "avx512f")]
15559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15560#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15561#[rustc_legacy_const_generics(2)]
15562pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15563    unsafe {
15564        static_assert_sae!(SAE);
15565        let a = a.as_f32x16();
15566        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15567        transmute(r)
15568    }
15569}
15570
15571/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15573///
15574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15575#[inline]
15576#[target_feature(enable = "avx512f")]
15577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15578#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15579#[rustc_legacy_const_generics(1)]
15580pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15581    unsafe {
15582        static_assert_sae!(SAE);
15583        let a = a.as_f32x16();
15584        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15585        transmute(r)
15586    }
15587}
15588
15589/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15591///
15592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15593#[inline]
15594#[target_feature(enable = "avx512f")]
15595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15596#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15597#[rustc_legacy_const_generics(3)]
15598pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15599    src: __m512i,
15600    k: __mmask16,
15601    a: __m512,
15602) -> __m512i {
15603    unsafe {
15604        static_assert_sae!(SAE);
15605        let a = a.as_f32x16();
15606        let src = src.as_u32x16();
15607        let r = vcvttps2udq(a, src, k, SAE);
15608        transmute(r)
15609    }
15610}
15611
15612/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15613/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15614///
15615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15616#[inline]
15617#[target_feature(enable = "avx512f")]
15618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15619#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15620#[rustc_legacy_const_generics(2)]
15621pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15622    unsafe {
15623        static_assert_sae!(SAE);
15624        let a = a.as_f32x16();
15625        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15626        transmute(r)
15627    }
15628}
15629
15630/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15631/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15632///
15633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15634#[inline]
15635#[target_feature(enable = "avx512f")]
15636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15637#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15638#[rustc_legacy_const_generics(1)]
15639pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15640    unsafe {
15641        static_assert_sae!(SAE);
15642        let a = a.as_f64x8();
15643        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15644        transmute(r)
15645    }
15646}
15647
15648/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15649/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15650///
15651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15652#[inline]
15653#[target_feature(enable = "avx512f")]
15654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15656#[rustc_legacy_const_generics(3)]
15657pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15658    src: __m256i,
15659    k: __mmask8,
15660    a: __m512d,
15661) -> __m256i {
15662    unsafe {
15663        static_assert_sae!(SAE);
15664        let a = a.as_f64x8();
15665        let src = src.as_i32x8();
15666        let r = vcvttpd2dq(a, src, k, SAE);
15667        transmute(r)
15668    }
15669}
15670
15671/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15673///
15674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15675#[inline]
15676#[target_feature(enable = "avx512f")]
15677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15678#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15679#[rustc_legacy_const_generics(2)]
15680pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15681    unsafe {
15682        static_assert_sae!(SAE);
15683        let a = a.as_f64x8();
15684        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15685        transmute(r)
15686    }
15687}
15688
15689/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15691///
15692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15693#[inline]
15694#[target_feature(enable = "avx512f")]
15695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15696#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15697#[rustc_legacy_const_generics(1)]
15698pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15699    unsafe {
15700        static_assert_sae!(SAE);
15701        let a = a.as_f64x8();
15702        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15703        transmute(r)
15704    }
15705}
15706
15707/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15708/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15709///
15710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15711#[inline]
15712#[target_feature(enable = "avx512f")]
15713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15714#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15715#[rustc_legacy_const_generics(3)]
15716pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15717    src: __m256i,
15718    k: __mmask8,
15719    a: __m512d,
15720) -> __m256i {
15721    unsafe {
15722        static_assert_sae!(SAE);
15723        let a = a.as_f64x8();
15724        let src = src.as_i32x8();
15725        let r = vcvttpd2udq(a, src, k, SAE);
15726        transmute(r)
15727    }
15728}
15729
15730/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15731///
15732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15733#[inline]
15734#[target_feature(enable = "avx512f")]
15735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15736#[cfg_attr(test, assert_instr(vcvttps2dq))]
15737pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15738    unsafe {
15739        transmute(vcvttps2dq(
15740            a.as_f32x16(),
15741            i32x16::ZERO,
15742            0b11111111_11111111,
15743            _MM_FROUND_CUR_DIRECTION,
15744        ))
15745    }
15746}
15747
15748/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15749///
15750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15751#[inline]
15752#[target_feature(enable = "avx512f")]
15753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15754#[cfg_attr(test, assert_instr(vcvttps2dq))]
15755pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15756    unsafe {
15757        transmute(vcvttps2dq(
15758            a.as_f32x16(),
15759            src.as_i32x16(),
15760            k,
15761            _MM_FROUND_CUR_DIRECTION,
15762        ))
15763    }
15764}
15765
15766/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15769#[inline]
15770#[target_feature(enable = "avx512f")]
15771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15772#[cfg_attr(test, assert_instr(vcvttps2dq))]
15773pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15774    unsafe {
15775        transmute(vcvttps2dq(
15776            a.as_f32x16(),
15777            i32x16::ZERO,
15778            k,
15779            _MM_FROUND_CUR_DIRECTION,
15780        ))
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15785///
15786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15787#[inline]
15788#[target_feature(enable = "avx512f,avx512vl")]
15789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15790#[cfg_attr(test, assert_instr(vcvttps2dq))]
15791pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15792    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15793}
15794
15795/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15796///
15797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15798#[inline]
15799#[target_feature(enable = "avx512f,avx512vl")]
15800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15801#[cfg_attr(test, assert_instr(vcvttps2dq))]
15802pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15803    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15804}
15805
15806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15807///
15808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15809#[inline]
15810#[target_feature(enable = "avx512f,avx512vl")]
15811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15812#[cfg_attr(test, assert_instr(vcvttps2dq))]
15813pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15814    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15815}
15816
15817/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15818///
15819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15820#[inline]
15821#[target_feature(enable = "avx512f,avx512vl")]
15822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15823#[cfg_attr(test, assert_instr(vcvttps2dq))]
15824pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15825    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15826}
15827
15828/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15829///
15830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15831#[inline]
15832#[target_feature(enable = "avx512f")]
15833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15834#[cfg_attr(test, assert_instr(vcvttps2udq))]
15835pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15836    unsafe {
15837        transmute(vcvttps2udq(
15838            a.as_f32x16(),
15839            u32x16::ZERO,
15840            0b11111111_11111111,
15841            _MM_FROUND_CUR_DIRECTION,
15842        ))
15843    }
15844}
15845
15846/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15847///
15848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15849#[inline]
15850#[target_feature(enable = "avx512f")]
15851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15852#[cfg_attr(test, assert_instr(vcvttps2udq))]
15853pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15854    unsafe {
15855        transmute(vcvttps2udq(
15856            a.as_f32x16(),
15857            src.as_u32x16(),
15858            k,
15859            _MM_FROUND_CUR_DIRECTION,
15860        ))
15861    }
15862}
15863
15864/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15865///
15866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15867#[inline]
15868#[target_feature(enable = "avx512f")]
15869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15870#[cfg_attr(test, assert_instr(vcvttps2udq))]
15871pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15872    unsafe {
15873        transmute(vcvttps2udq(
15874            a.as_f32x16(),
15875            u32x16::ZERO,
15876            k,
15877            _MM_FROUND_CUR_DIRECTION,
15878        ))
15879    }
15880}
15881
15882/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15883///
15884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15885#[inline]
15886#[target_feature(enable = "avx512f,avx512vl")]
15887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888#[cfg_attr(test, assert_instr(vcvttps2udq))]
15889pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15890    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15891}
15892
15893/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15894///
15895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15896#[inline]
15897#[target_feature(enable = "avx512f,avx512vl")]
15898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15899#[cfg_attr(test, assert_instr(vcvttps2udq))]
15900pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15901    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15902}
15903
15904/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15905///
15906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15907#[inline]
15908#[target_feature(enable = "avx512f,avx512vl")]
15909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910#[cfg_attr(test, assert_instr(vcvttps2udq))]
15911pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15912    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15913}
15914
15915/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15916///
15917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15918#[inline]
15919#[target_feature(enable = "avx512f,avx512vl")]
15920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921#[cfg_attr(test, assert_instr(vcvttps2udq))]
15922pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15923    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15924}
15925
15926/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15927///
15928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15929#[inline]
15930#[target_feature(enable = "avx512f,avx512vl")]
15931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932#[cfg_attr(test, assert_instr(vcvttps2udq))]
15933pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15934    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15935}
15936
15937/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15943#[cfg_attr(test, assert_instr(vcvttps2udq))]
15944pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15945    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
15946}
15947
15948/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15950///
15951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15952#[inline]
15953#[target_feature(enable = "avx512f")]
15954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15955#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15956#[rustc_legacy_const_generics(2)]
15957pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15958    unsafe {
15959        static_assert_sae!(SAE);
15960        let a = a.as_f64x8();
15961        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
15962        transmute(r)
15963    }
15964}
15965
15966/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15972#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15973pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15974    unsafe {
15975        transmute(vcvttpd2dq(
15976            a.as_f64x8(),
15977            i32x8::ZERO,
15978            0b11111111,
15979            _MM_FROUND_CUR_DIRECTION,
15980        ))
15981    }
15982}
15983
15984/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15990#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15991pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15992    unsafe {
15993        transmute(vcvttpd2dq(
15994            a.as_f64x8(),
15995            src.as_i32x8(),
15996            k,
15997            _MM_FROUND_CUR_DIRECTION,
15998        ))
15999    }
16000}
16001
16002/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16003///
16004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16005#[inline]
16006#[target_feature(enable = "avx512f")]
16007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16009pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16010    unsafe {
16011        transmute(vcvttpd2dq(
16012            a.as_f64x8(),
16013            i32x8::ZERO,
16014            k,
16015            _MM_FROUND_CUR_DIRECTION,
16016        ))
16017    }
16018}
16019
16020/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16021///
16022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16023#[inline]
16024#[target_feature(enable = "avx512f,avx512vl")]
16025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16026#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16027pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16028    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16029}
16030
16031/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16032///
16033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16034#[inline]
16035#[target_feature(enable = "avx512f,avx512vl")]
16036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16037#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16038pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16039    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16040}
16041
16042/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16043///
16044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16045#[inline]
16046#[target_feature(enable = "avx512f,avx512vl")]
16047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16048#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16049pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16050    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16051}
16052
16053/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16054///
16055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16056#[inline]
16057#[target_feature(enable = "avx512f,avx512vl")]
16058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16060pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16061    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16062}
16063
16064/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16065///
16066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16067#[inline]
16068#[target_feature(enable = "avx512f")]
16069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16070#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16071pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16072    unsafe {
16073        transmute(vcvttpd2udq(
16074            a.as_f64x8(),
16075            i32x8::ZERO,
16076            0b11111111,
16077            _MM_FROUND_CUR_DIRECTION,
16078        ))
16079    }
16080}
16081
16082/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16083///
16084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16085#[inline]
16086#[target_feature(enable = "avx512f")]
16087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16088#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16089pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16090    unsafe {
16091        transmute(vcvttpd2udq(
16092            a.as_f64x8(),
16093            src.as_i32x8(),
16094            k,
16095            _MM_FROUND_CUR_DIRECTION,
16096        ))
16097    }
16098}
16099
16100/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16101///
16102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16103#[inline]
16104#[target_feature(enable = "avx512f")]
16105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16106#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16107pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16108    unsafe {
16109        transmute(vcvttpd2udq(
16110            a.as_f64x8(),
16111            i32x8::ZERO,
16112            k,
16113            _MM_FROUND_CUR_DIRECTION,
16114        ))
16115    }
16116}
16117
16118/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16119///
16120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16121#[inline]
16122#[target_feature(enable = "avx512f,avx512vl")]
16123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16124#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16125pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16126    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16127}
16128
16129/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16130///
16131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16132#[inline]
16133#[target_feature(enable = "avx512f,avx512vl")]
16134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16135#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16136pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16137    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16138}
16139
16140/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16141///
16142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16143#[inline]
16144#[target_feature(enable = "avx512f,avx512vl")]
16145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16146#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16147pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16148    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16149}
16150
16151/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16152///
16153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16154#[inline]
16155#[target_feature(enable = "avx512f,avx512vl")]
16156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16157#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16158pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16159    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16160}
16161
16162/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16163///
16164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16165#[inline]
16166#[target_feature(enable = "avx512f,avx512vl")]
16167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16168#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16169pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16170    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16171}
16172
16173/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16174///
16175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16176#[inline]
16177#[target_feature(enable = "avx512f,avx512vl")]
16178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16179#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16180pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16181    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16182}
16183
16184/// Returns vector of type `__m512d` with all elements set to zero.
16185///
16186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16187#[inline]
16188#[target_feature(enable = "avx512f")]
16189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16190#[cfg_attr(test, assert_instr(vxorps))]
16191pub fn _mm512_setzero_pd() -> __m512d {
16192    // All-0 is a properly initialized __m512d
16193    unsafe { const { mem::zeroed() } }
16194}
16195
16196/// Returns vector of type `__m512` with all elements set to zero.
16197///
16198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16199#[inline]
16200#[target_feature(enable = "avx512f")]
16201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16202#[cfg_attr(test, assert_instr(vxorps))]
16203pub fn _mm512_setzero_ps() -> __m512 {
16204    // All-0 is a properly initialized __m512
16205    unsafe { const { mem::zeroed() } }
16206}
16207
16208/// Return vector of type `__m512` with all elements set to zero.
16209///
16210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16211#[inline]
16212#[target_feature(enable = "avx512f")]
16213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16214#[cfg_attr(test, assert_instr(vxorps))]
16215pub fn _mm512_setzero() -> __m512 {
16216    // All-0 is a properly initialized __m512
16217    unsafe { const { mem::zeroed() } }
16218}
16219
16220/// Returns vector of type `__m512i` with all elements set to zero.
16221///
16222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16223#[inline]
16224#[target_feature(enable = "avx512f")]
16225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226#[cfg_attr(test, assert_instr(vxorps))]
16227pub fn _mm512_setzero_si512() -> __m512i {
16228    // All-0 is a properly initialized __m512i
16229    unsafe { const { mem::zeroed() } }
16230}
16231
16232/// Return vector of type `__m512i` with all elements set to zero.
16233///
16234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16235#[inline]
16236#[target_feature(enable = "avx512f")]
16237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16238#[cfg_attr(test, assert_instr(vxorps))]
16239pub fn _mm512_setzero_epi32() -> __m512i {
16240    // All-0 is a properly initialized __m512i
16241    unsafe { const { mem::zeroed() } }
16242}
16243
16244/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16245/// order.
16246///
16247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16248#[inline]
16249#[target_feature(enable = "avx512f")]
16250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16251pub fn _mm512_setr_epi32(
16252    e15: i32,
16253    e14: i32,
16254    e13: i32,
16255    e12: i32,
16256    e11: i32,
16257    e10: i32,
16258    e9: i32,
16259    e8: i32,
16260    e7: i32,
16261    e6: i32,
16262    e5: i32,
16263    e4: i32,
16264    e3: i32,
16265    e2: i32,
16266    e1: i32,
16267    e0: i32,
16268) -> __m512i {
16269    unsafe {
16270        let r = i32x16::new(
16271            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16272        );
16273        transmute(r)
16274    }
16275}
16276
16277/// Set packed 8-bit integers in dst with the supplied values.
16278///
16279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16280#[inline]
16281#[target_feature(enable = "avx512f")]
16282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283pub fn _mm512_set_epi8(
16284    e63: i8,
16285    e62: i8,
16286    e61: i8,
16287    e60: i8,
16288    e59: i8,
16289    e58: i8,
16290    e57: i8,
16291    e56: i8,
16292    e55: i8,
16293    e54: i8,
16294    e53: i8,
16295    e52: i8,
16296    e51: i8,
16297    e50: i8,
16298    e49: i8,
16299    e48: i8,
16300    e47: i8,
16301    e46: i8,
16302    e45: i8,
16303    e44: i8,
16304    e43: i8,
16305    e42: i8,
16306    e41: i8,
16307    e40: i8,
16308    e39: i8,
16309    e38: i8,
16310    e37: i8,
16311    e36: i8,
16312    e35: i8,
16313    e34: i8,
16314    e33: i8,
16315    e32: i8,
16316    e31: i8,
16317    e30: i8,
16318    e29: i8,
16319    e28: i8,
16320    e27: i8,
16321    e26: i8,
16322    e25: i8,
16323    e24: i8,
16324    e23: i8,
16325    e22: i8,
16326    e21: i8,
16327    e20: i8,
16328    e19: i8,
16329    e18: i8,
16330    e17: i8,
16331    e16: i8,
16332    e15: i8,
16333    e14: i8,
16334    e13: i8,
16335    e12: i8,
16336    e11: i8,
16337    e10: i8,
16338    e9: i8,
16339    e8: i8,
16340    e7: i8,
16341    e6: i8,
16342    e5: i8,
16343    e4: i8,
16344    e3: i8,
16345    e2: i8,
16346    e1: i8,
16347    e0: i8,
16348) -> __m512i {
16349    unsafe {
16350        let r = i8x64::new(
16351            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16352            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16353            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16354            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16355        );
16356        transmute(r)
16357    }
16358}
16359
16360/// Set packed 16-bit integers in dst with the supplied values.
16361///
16362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16363#[inline]
16364#[target_feature(enable = "avx512f")]
16365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16366pub fn _mm512_set_epi16(
16367    e31: i16,
16368    e30: i16,
16369    e29: i16,
16370    e28: i16,
16371    e27: i16,
16372    e26: i16,
16373    e25: i16,
16374    e24: i16,
16375    e23: i16,
16376    e22: i16,
16377    e21: i16,
16378    e20: i16,
16379    e19: i16,
16380    e18: i16,
16381    e17: i16,
16382    e16: i16,
16383    e15: i16,
16384    e14: i16,
16385    e13: i16,
16386    e12: i16,
16387    e11: i16,
16388    e10: i16,
16389    e9: i16,
16390    e8: i16,
16391    e7: i16,
16392    e6: i16,
16393    e5: i16,
16394    e4: i16,
16395    e3: i16,
16396    e2: i16,
16397    e1: i16,
16398    e0: i16,
16399) -> __m512i {
16400    unsafe {
16401        let r = i16x32::new(
16402            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16403            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16404        );
16405        transmute(r)
16406    }
16407}
16408
16409/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16410///
16411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16412#[inline]
16413#[target_feature(enable = "avx512f")]
16414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16415pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16416    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16417}
16418
16419/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16426    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16427}
16428
16429/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16430///
16431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16432#[inline]
16433#[target_feature(enable = "avx512f")]
16434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16435pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16436    _mm512_set_pd(d, c, b, a, d, c, b, a)
16437}
16438
16439/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16440///
16441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16442#[inline]
16443#[target_feature(enable = "avx512f")]
16444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16445pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16446    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16447}
16448
16449/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16450///
16451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16452#[inline]
16453#[target_feature(enable = "avx512f")]
16454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16455pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16456    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16457}
16458
16459/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16460///
16461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16462#[inline]
16463#[target_feature(enable = "avx512f")]
16464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16465pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16466    _mm512_set_pd(a, b, c, d, a, b, c, d)
16467}
16468
16469/// Set packed 64-bit integers in dst with the supplied values.
16470///
16471/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475pub fn _mm512_set_epi64(
16476    e0: i64,
16477    e1: i64,
16478    e2: i64,
16479    e3: i64,
16480    e4: i64,
16481    e5: i64,
16482    e6: i64,
16483    e7: i64,
16484) -> __m512i {
16485    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16486}
16487
16488/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16489///
16490/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16491#[inline]
16492#[target_feature(enable = "avx512f")]
16493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16494pub fn _mm512_setr_epi64(
16495    e0: i64,
16496    e1: i64,
16497    e2: i64,
16498    e3: i64,
16499    e4: i64,
16500    e5: i64,
16501    e6: i64,
16502    e7: i64,
16503) -> __m512i {
16504    unsafe {
16505        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16506        transmute(r)
16507    }
16508}
16509
16510/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16511///
16512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16513#[inline]
16514#[target_feature(enable = "avx512f")]
16515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16516#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16517#[rustc_legacy_const_generics(2)]
16518pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16519    offsets: __m256i,
16520    slice: *const f64,
16521) -> __m512d {
16522    static_assert_imm8_scale!(SCALE);
16523    let zero = f64x8::ZERO;
16524    let neg_one = -1;
16525    let slice = slice as *const i8;
16526    let offsets = offsets.as_i32x8();
16527    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16528    transmute(r)
16529}
16530
16531/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16532///
16533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16534#[inline]
16535#[target_feature(enable = "avx512f")]
16536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16537#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16538#[rustc_legacy_const_generics(4)]
16539pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16540    src: __m512d,
16541    mask: __mmask8,
16542    offsets: __m256i,
16543    slice: *const f64,
16544) -> __m512d {
16545    static_assert_imm8_scale!(SCALE);
16546    let src = src.as_f64x8();
16547    let slice = slice as *const i8;
16548    let offsets = offsets.as_i32x8();
16549    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16550    transmute(r)
16551}
16552
16553/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16554///
16555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16556#[inline]
16557#[target_feature(enable = "avx512f")]
16558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16559#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16560#[rustc_legacy_const_generics(2)]
16561pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
16562    offsets: __m512i,
16563    slice: *const f64,
16564) -> __m512d {
16565    static_assert_imm8_scale!(SCALE);
16566    let zero = f64x8::ZERO;
16567    let neg_one = -1;
16568    let slice = slice as *const i8;
16569    let offsets = offsets.as_i64x8();
16570    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16571    transmute(r)
16572}
16573
16574/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16575///
16576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16577#[inline]
16578#[target_feature(enable = "avx512f")]
16579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16580#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16581#[rustc_legacy_const_generics(4)]
16582pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16583    src: __m512d,
16584    mask: __mmask8,
16585    offsets: __m512i,
16586    slice: *const f64,
16587) -> __m512d {
16588    static_assert_imm8_scale!(SCALE);
16589    let src = src.as_f64x8();
16590    let slice = slice as *const i8;
16591    let offsets = offsets.as_i64x8();
16592    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16593    transmute(r)
16594}
16595
16596/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16597///
16598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16599#[inline]
16600#[target_feature(enable = "avx512f")]
16601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16602#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16603#[rustc_legacy_const_generics(2)]
16604pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
16605    static_assert_imm8_scale!(SCALE);
16606    let zero = f32x8::ZERO;
16607    let neg_one = -1;
16608    let slice = slice as *const i8;
16609    let offsets = offsets.as_i64x8();
16610    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16611    transmute(r)
16612}
16613
16614/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16615///
16616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16617#[inline]
16618#[target_feature(enable = "avx512f")]
16619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16620#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16621#[rustc_legacy_const_generics(4)]
16622pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16623    src: __m256,
16624    mask: __mmask8,
16625    offsets: __m512i,
16626    slice: *const f32,
16627) -> __m256 {
16628    static_assert_imm8_scale!(SCALE);
16629    let src = src.as_f32x8();
16630    let slice = slice as *const i8;
16631    let offsets = offsets.as_i64x8();
16632    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16633    transmute(r)
16634}
16635
16636/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16637///
16638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16639#[inline]
16640#[target_feature(enable = "avx512f")]
16641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16642#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16643#[rustc_legacy_const_generics(2)]
16644pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
16645    static_assert_imm8_scale!(SCALE);
16646    let zero = f32x16::ZERO;
16647    let neg_one = -1;
16648    let slice = slice as *const i8;
16649    let offsets = offsets.as_i32x16();
16650    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16651    transmute(r)
16652}
16653
16654/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16655///
16656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16657#[inline]
16658#[target_feature(enable = "avx512f")]
16659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16660#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16661#[rustc_legacy_const_generics(4)]
16662pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16663    src: __m512,
16664    mask: __mmask16,
16665    offsets: __m512i,
16666    slice: *const f32,
16667) -> __m512 {
16668    static_assert_imm8_scale!(SCALE);
16669    let src = src.as_f32x16();
16670    let slice = slice as *const i8;
16671    let offsets = offsets.as_i32x16();
16672    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16673    transmute(r)
16674}
16675
16676/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16677///
16678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16679#[inline]
16680#[target_feature(enable = "avx512f")]
16681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16682#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16683#[rustc_legacy_const_generics(2)]
16684pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16685    offsets: __m512i,
16686    slice: *const i32,
16687) -> __m512i {
16688    static_assert_imm8_scale!(SCALE);
16689    let zero = i32x16::ZERO;
16690    let neg_one = -1;
16691    let slice = slice as *const i8;
16692    let offsets = offsets.as_i32x16();
16693    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16694    transmute(r)
16695}
16696
16697/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16698///
16699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16700#[inline]
16701#[target_feature(enable = "avx512f")]
16702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16703#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16704#[rustc_legacy_const_generics(4)]
16705pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16706    src: __m512i,
16707    mask: __mmask16,
16708    offsets: __m512i,
16709    slice: *const i32,
16710) -> __m512i {
16711    static_assert_imm8_scale!(SCALE);
16712    let src = src.as_i32x16();
16713    let mask = mask as i16;
16714    let slice = slice as *const i8;
16715    let offsets = offsets.as_i32x16();
16716    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16717    transmute(r)
16718}
16719
16720/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16721///
16722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16723#[inline]
16724#[target_feature(enable = "avx512f")]
16725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16726#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16727#[rustc_legacy_const_generics(2)]
16728pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16729    offsets: __m256i,
16730    slice: *const i64,
16731) -> __m512i {
16732    static_assert_imm8_scale!(SCALE);
16733    let zero = i64x8::ZERO;
16734    let neg_one = -1;
16735    let slice = slice as *const i8;
16736    let offsets = offsets.as_i32x8();
16737    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16738    transmute(r)
16739}
16740
16741/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16742///
16743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16744#[inline]
16745#[target_feature(enable = "avx512f")]
16746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16747#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16748#[rustc_legacy_const_generics(4)]
16749pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16750    src: __m512i,
16751    mask: __mmask8,
16752    offsets: __m256i,
16753    slice: *const i64,
16754) -> __m512i {
16755    static_assert_imm8_scale!(SCALE);
16756    let src = src.as_i64x8();
16757    let mask = mask as i8;
16758    let slice = slice as *const i8;
16759    let offsets = offsets.as_i32x8();
16760    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16761    transmute(r)
16762}
16763
16764/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16765///
16766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16767#[inline]
16768#[target_feature(enable = "avx512f")]
16769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16770#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16771#[rustc_legacy_const_generics(2)]
16772pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16773    offsets: __m512i,
16774    slice: *const i64,
16775) -> __m512i {
16776    static_assert_imm8_scale!(SCALE);
16777    let zero = i64x8::ZERO;
16778    let neg_one = -1;
16779    let slice = slice as *const i8;
16780    let offsets = offsets.as_i64x8();
16781    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16782    transmute(r)
16783}
16784
16785/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16786///
16787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16788#[inline]
16789#[target_feature(enable = "avx512f")]
16790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16791#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16792#[rustc_legacy_const_generics(4)]
16793pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16794    src: __m512i,
16795    mask: __mmask8,
16796    offsets: __m512i,
16797    slice: *const i64,
16798) -> __m512i {
16799    static_assert_imm8_scale!(SCALE);
16800    let src = src.as_i64x8();
16801    let mask = mask as i8;
16802    let slice = slice as *const i8;
16803    let offsets = offsets.as_i64x8();
16804    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16805    transmute(r)
16806}
16807
16808/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16809///
16810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16811#[inline]
16812#[target_feature(enable = "avx512f")]
16813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16814#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16815#[rustc_legacy_const_generics(2)]
16816pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16817    offsets: __m512i,
16818    slice: *const i32,
16819) -> __m256i {
16820    static_assert_imm8_scale!(SCALE);
16821    let zeros = i32x8::ZERO;
16822    let neg_one = -1;
16823    let slice = slice as *const i8;
16824    let offsets = offsets.as_i64x8();
16825    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16826    transmute(r)
16827}
16828
16829/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16830///
16831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16832#[inline]
16833#[target_feature(enable = "avx512f")]
16834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16835#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16836#[rustc_legacy_const_generics(4)]
16837pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16838    src: __m256i,
16839    mask: __mmask8,
16840    offsets: __m512i,
16841    slice: *const i32,
16842) -> __m256i {
16843    static_assert_imm8_scale!(SCALE);
16844    let src = src.as_i32x8();
16845    let mask = mask as i8;
16846    let slice = slice as *const i8;
16847    let offsets = offsets.as_i64x8();
16848    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16849    transmute(r)
16850}
16851
16852/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16853///
16854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16855#[inline]
16856#[target_feature(enable = "avx512f")]
16857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16858#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16859#[rustc_legacy_const_generics(3)]
16860pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16861    slice: *mut f64,
16862    offsets: __m256i,
16863    src: __m512d,
16864) {
16865    static_assert_imm8_scale!(SCALE);
16866    let src = src.as_f64x8();
16867    let neg_one = -1;
16868    let slice = slice as *mut i8;
16869    let offsets = offsets.as_i32x8();
16870    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16871}
16872
16873/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16874///
16875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16876#[inline]
16877#[target_feature(enable = "avx512f")]
16878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16879#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16880#[rustc_legacy_const_generics(4)]
16881pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16882    slice: *mut f64,
16883    mask: __mmask8,
16884    offsets: __m256i,
16885    src: __m512d,
16886) {
16887    static_assert_imm8_scale!(SCALE);
16888    let src = src.as_f64x8();
16889    let slice = slice as *mut i8;
16890    let offsets = offsets.as_i32x8();
16891    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16892}
16893
16894/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16895///
16896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16897#[inline]
16898#[target_feature(enable = "avx512f")]
16899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16900#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16901#[rustc_legacy_const_generics(3)]
16902pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16903    slice: *mut f64,
16904    offsets: __m512i,
16905    src: __m512d,
16906) {
16907    static_assert_imm8_scale!(SCALE);
16908    let src = src.as_f64x8();
16909    let neg_one = -1;
16910    let slice = slice as *mut i8;
16911    let offsets = offsets.as_i64x8();
16912    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16913}
16914
16915/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16916///
16917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16918#[inline]
16919#[target_feature(enable = "avx512f")]
16920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16921#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16922#[rustc_legacy_const_generics(4)]
16923pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16924    slice: *mut f64,
16925    mask: __mmask8,
16926    offsets: __m512i,
16927    src: __m512d,
16928) {
16929    static_assert_imm8_scale!(SCALE);
16930    let src = src.as_f64x8();
16931    let slice = slice as *mut i8;
16932    let offsets = offsets.as_i64x8();
16933    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16934}
16935
16936/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16937///
16938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16939#[inline]
16940#[target_feature(enable = "avx512f")]
16941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16942#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16943#[rustc_legacy_const_generics(3)]
16944pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16945    slice: *mut f32,
16946    offsets: __m512i,
16947    src: __m512,
16948) {
16949    static_assert_imm8_scale!(SCALE);
16950    let src = src.as_f32x16();
16951    let neg_one = -1;
16952    let slice = slice as *mut i8;
16953    let offsets = offsets.as_i32x16();
16954    vscatterdps(slice, neg_one, offsets, src, SCALE);
16955}
16956
16957/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16958///
16959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16960#[inline]
16961#[target_feature(enable = "avx512f")]
16962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16963#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16964#[rustc_legacy_const_generics(4)]
16965pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16966    slice: *mut f32,
16967    mask: __mmask16,
16968    offsets: __m512i,
16969    src: __m512,
16970) {
16971    static_assert_imm8_scale!(SCALE);
16972    let src = src.as_f32x16();
16973    let slice = slice as *mut i8;
16974    let offsets = offsets.as_i32x16();
16975    vscatterdps(slice, mask as i16, offsets, src, SCALE);
16976}
16977
16978/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16979///
16980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16981#[inline]
16982#[target_feature(enable = "avx512f")]
16983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16984#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16985#[rustc_legacy_const_generics(3)]
16986pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16987    slice: *mut f32,
16988    offsets: __m512i,
16989    src: __m256,
16990) {
16991    static_assert_imm8_scale!(SCALE);
16992    let src = src.as_f32x8();
16993    let neg_one = -1;
16994    let slice = slice as *mut i8;
16995    let offsets = offsets.as_i64x8();
16996    vscatterqps(slice, neg_one, offsets, src, SCALE);
16997}
16998
16999/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17000///
17001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17002#[inline]
17003#[target_feature(enable = "avx512f")]
17004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17005#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17006#[rustc_legacy_const_generics(4)]
17007pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17008    slice: *mut f32,
17009    mask: __mmask8,
17010    offsets: __m512i,
17011    src: __m256,
17012) {
17013    static_assert_imm8_scale!(SCALE);
17014    let src = src.as_f32x8();
17015    let slice = slice as *mut i8;
17016    let offsets = offsets.as_i64x8();
17017    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17018}
17019
17020/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17021///
17022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17023#[inline]
17024#[target_feature(enable = "avx512f")]
17025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17026#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17027#[rustc_legacy_const_generics(3)]
17028pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17029    slice: *mut i64,
17030    offsets: __m256i,
17031    src: __m512i,
17032) {
17033    static_assert_imm8_scale!(SCALE);
17034    let src = src.as_i64x8();
17035    let neg_one = -1;
17036    let slice = slice as *mut i8;
17037    let offsets = offsets.as_i32x8();
17038    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17039}
17040
17041/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17042///
17043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17044#[inline]
17045#[target_feature(enable = "avx512f")]
17046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17047#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17048#[rustc_legacy_const_generics(4)]
17049pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17050    slice: *mut i64,
17051    mask: __mmask8,
17052    offsets: __m256i,
17053    src: __m512i,
17054) {
17055    static_assert_imm8_scale!(SCALE);
17056    let src = src.as_i64x8();
17057    let mask = mask as i8;
17058    let slice = slice as *mut i8;
17059    let offsets = offsets.as_i32x8();
17060    vpscatterdq(slice, mask, offsets, src, SCALE);
17061}
17062
17063/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17064///
17065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17066#[inline]
17067#[target_feature(enable = "avx512f")]
17068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17069#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17070#[rustc_legacy_const_generics(3)]
17071pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17072    slice: *mut i64,
17073    offsets: __m512i,
17074    src: __m512i,
17075) {
17076    static_assert_imm8_scale!(SCALE);
17077    let src = src.as_i64x8();
17078    let neg_one = -1;
17079    let slice = slice as *mut i8;
17080    let offsets = offsets.as_i64x8();
17081    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17082}
17083
17084/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17085///
17086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17087#[inline]
17088#[target_feature(enable = "avx512f")]
17089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17090#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17091#[rustc_legacy_const_generics(4)]
17092pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17093    slice: *mut i64,
17094    mask: __mmask8,
17095    offsets: __m512i,
17096    src: __m512i,
17097) {
17098    static_assert_imm8_scale!(SCALE);
17099    let src = src.as_i64x8();
17100    let mask = mask as i8;
17101    let slice = slice as *mut i8;
17102    let offsets = offsets.as_i64x8();
17103    vpscatterqq(slice, mask, offsets, src, SCALE);
17104}
17105
17106/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17107///
17108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17109#[inline]
17110#[target_feature(enable = "avx512f")]
17111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17112#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17113#[rustc_legacy_const_generics(3)]
17114pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17115    slice: *mut i32,
17116    offsets: __m512i,
17117    src: __m512i,
17118) {
17119    static_assert_imm8_scale!(SCALE);
17120    let src = src.as_i32x16();
17121    let neg_one = -1;
17122    let slice = slice as *mut i8;
17123    let offsets = offsets.as_i32x16();
17124    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17125}
17126
17127/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17128///
17129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17130#[inline]
17131#[target_feature(enable = "avx512f")]
17132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17133#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17134#[rustc_legacy_const_generics(4)]
17135pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17136    slice: *mut i32,
17137    mask: __mmask16,
17138    offsets: __m512i,
17139    src: __m512i,
17140) {
17141    static_assert_imm8_scale!(SCALE);
17142    let src = src.as_i32x16();
17143    let mask = mask as i16;
17144    let slice = slice as *mut i8;
17145    let offsets = offsets.as_i32x16();
17146    vpscatterdd(slice, mask, offsets, src, SCALE);
17147}
17148
17149/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17150///
17151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17152#[inline]
17153#[target_feature(enable = "avx512f")]
17154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17155#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17156#[rustc_legacy_const_generics(3)]
17157pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17158    slice: *mut i32,
17159    offsets: __m512i,
17160    src: __m256i,
17161) {
17162    static_assert_imm8_scale!(SCALE);
17163    let src = src.as_i32x8();
17164    let neg_one = -1;
17165    let slice = slice as *mut i8;
17166    let offsets = offsets.as_i64x8();
17167    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17168}
17169
17170/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17171///
17172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17173#[inline]
17174#[target_feature(enable = "avx512f")]
17175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17176#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17177#[rustc_legacy_const_generics(4)]
17178pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17179    slice: *mut i32,
17180    mask: __mmask8,
17181    offsets: __m512i,
17182    src: __m256i,
17183) {
17184    static_assert_imm8_scale!(SCALE);
17185    let src = src.as_i32x8();
17186    let mask = mask as i8;
17187    let slice = slice as *mut i8;
17188    let offsets = offsets.as_i64x8();
17189    vpscatterqd(slice, mask, offsets, src, SCALE);
17190}
17191
17192/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17193/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17194///
17195/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17196#[inline]
17197#[target_feature(enable = "avx512f")]
17198#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17199#[rustc_legacy_const_generics(2)]
17200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17201pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17202    vindex: __m512i,
17203    base_addr: *const i64,
17204) -> __m512i {
17205    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17206}
17207
17208/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17209/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17210/// (elements are copied from src when the corresponding mask bit is not set).
17211///
17212/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17213#[inline]
17214#[target_feature(enable = "avx512f")]
17215#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17216#[rustc_legacy_const_generics(4)]
17217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17218pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17219    src: __m512i,
17220    k: __mmask8,
17221    vindex: __m512i,
17222    base_addr: *const i64,
17223) -> __m512i {
17224    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17225}
17226
17227/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17228/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17229///
17230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17231#[inline]
17232#[target_feature(enable = "avx512f")]
17233#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17234#[rustc_legacy_const_generics(2)]
17235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17236pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17237    vindex: __m512i,
17238    base_addr: *const f64,
17239) -> __m512d {
17240    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17241}
17242
17243/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17244/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17245/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17246///
17247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17248#[inline]
17249#[target_feature(enable = "avx512f")]
17250#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17251#[rustc_legacy_const_generics(4)]
17252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17253pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17254    src: __m512d,
17255    k: __mmask8,
17256    vindex: __m512i,
17257    base_addr: *const f64,
17258) -> __m512d {
17259    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17260}
17261
17262/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17263/// indices stored in the lower half of vindex scaled by scale.
17264///
17265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17266#[inline]
17267#[target_feature(enable = "avx512f")]
17268#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17269#[rustc_legacy_const_generics(3)]
17270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17271pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17272    base_addr: *mut i64,
17273    vindex: __m512i,
17274    a: __m512i,
17275) {
17276    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17277}
17278
17279/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17280/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17281/// mask bit is not set are not written to memory).
17282///
17283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17284#[inline]
17285#[target_feature(enable = "avx512f")]
17286#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17287#[rustc_legacy_const_generics(4)]
17288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17289pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17290    base_addr: *mut i64,
17291    k: __mmask8,
17292    vindex: __m512i,
17293    a: __m512i,
17294) {
17295    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17296}
17297
17298/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17299/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17300///
17301/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17302#[inline]
17303#[target_feature(enable = "avx512f")]
17304#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17305#[rustc_legacy_const_generics(3)]
17306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17307pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17308    base_addr: *mut f64,
17309    vindex: __m512i,
17310    a: __m512d,
17311) {
17312    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17313}
17314
17315/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17316/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17317/// (elements whose corresponding mask bit is not set are not written to memory).
17318///
17319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17320#[inline]
17321#[target_feature(enable = "avx512f")]
17322#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17323#[rustc_legacy_const_generics(4)]
17324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17325pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17326    base_addr: *mut f64,
17327    k: __mmask8,
17328    vindex: __m512i,
17329    a: __m512d,
17330) {
17331    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17332}
17333
17334/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17335/// indices stored in vindex scaled by scale
17336///
17337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17338#[inline]
17339#[target_feature(enable = "avx512f,avx512vl")]
17340#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17341#[rustc_legacy_const_generics(3)]
17342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17343pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17344    base_addr: *mut i32,
17345    vindex: __m256i,
17346    a: __m256i,
17347) {
17348    static_assert_imm8_scale!(SCALE);
17349    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17350}
17351
17352/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17353/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17354/// are not written to memory).
17355///
17356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17357#[inline]
17358#[target_feature(enable = "avx512f,avx512vl")]
17359#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17360#[rustc_legacy_const_generics(4)]
17361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17362pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17363    base_addr: *mut i32,
17364    k: __mmask8,
17365    vindex: __m256i,
17366    a: __m256i,
17367) {
17368    static_assert_imm8_scale!(SCALE);
17369    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17370}
17371
17372/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17373///
17374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17375#[inline]
17376#[target_feature(enable = "avx512f,avx512vl")]
17377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17378#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17379#[rustc_legacy_const_generics(3)]
17380pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17381    slice: *mut i64,
17382    offsets: __m128i,
17383    src: __m256i,
17384) {
17385    static_assert_imm8_scale!(SCALE);
17386    let src = src.as_i64x4();
17387    let slice = slice as *mut i8;
17388    let offsets = offsets.as_i32x4();
17389    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17390}
17391
17392/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17393/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17394/// are not written to memory).
17395///
17396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17397#[inline]
17398#[target_feature(enable = "avx512f,avx512vl")]
17399#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17400#[rustc_legacy_const_generics(4)]
17401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17402pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17403    base_addr: *mut i64,
17404    k: __mmask8,
17405    vindex: __m128i,
17406    a: __m256i,
17407) {
17408    static_assert_imm8_scale!(SCALE);
17409    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17410}
17411
17412/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17413/// at packed 32-bit integer indices stored in vindex scaled by scale
17414///
17415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17416#[inline]
17417#[target_feature(enable = "avx512f,avx512vl")]
17418#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17419#[rustc_legacy_const_generics(3)]
17420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17421pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17422    base_addr: *mut f64,
17423    vindex: __m128i,
17424    a: __m256d,
17425) {
17426    static_assert_imm8_scale!(SCALE);
17427    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17428}
17429
17430/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17431/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17432/// mask bit is not set are not written to memory).
17433///
17434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17435#[inline]
17436#[target_feature(enable = "avx512f,avx512vl")]
17437#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17438#[rustc_legacy_const_generics(4)]
17439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17440pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17441    base_addr: *mut f64,
17442    k: __mmask8,
17443    vindex: __m128i,
17444    a: __m256d,
17445) {
17446    static_assert_imm8_scale!(SCALE);
17447    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17448}
17449
17450/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17451/// at packed 32-bit integer indices stored in vindex scaled by scale
17452///
17453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17454#[inline]
17455#[target_feature(enable = "avx512f,avx512vl")]
17456#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17457#[rustc_legacy_const_generics(3)]
17458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17459pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17460    base_addr: *mut f32,
17461    vindex: __m256i,
17462    a: __m256,
17463) {
17464    static_assert_imm8_scale!(SCALE);
17465    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17466}
17467
17468/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17469/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17470/// mask bit is not set are not written to memory).
17471///
17472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17473#[inline]
17474#[target_feature(enable = "avx512f,avx512vl")]
17475#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17476#[rustc_legacy_const_generics(4)]
17477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17478pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17479    base_addr: *mut f32,
17480    k: __mmask8,
17481    vindex: __m256i,
17482    a: __m256,
17483) {
17484    static_assert_imm8_scale!(SCALE);
17485    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17486}
17487
17488/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17489/// indices stored in vindex scaled by scale
17490///
17491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17492#[inline]
17493#[target_feature(enable = "avx512f,avx512vl")]
17494#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17495#[rustc_legacy_const_generics(3)]
17496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17497pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17498    base_addr: *mut i32,
17499    vindex: __m256i,
17500    a: __m128i,
17501) {
17502    static_assert_imm8_scale!(SCALE);
17503    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17504}
17505
17506/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17507/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17508/// are not written to memory).
17509///
17510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17511#[inline]
17512#[target_feature(enable = "avx512f,avx512vl")]
17513#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17514#[rustc_legacy_const_generics(4)]
17515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17516pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17517    base_addr: *mut i32,
17518    k: __mmask8,
17519    vindex: __m256i,
17520    a: __m128i,
17521) {
17522    static_assert_imm8_scale!(SCALE);
17523    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17524}
17525
17526/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17527/// indices stored in vindex scaled by scale
17528///
17529/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17530#[inline]
17531#[target_feature(enable = "avx512f,avx512vl")]
17532#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17533#[rustc_legacy_const_generics(3)]
17534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17535pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17536    base_addr: *mut i64,
17537    vindex: __m256i,
17538    a: __m256i,
17539) {
17540    static_assert_imm8_scale!(SCALE);
17541    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17542}
17543
17544/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17545/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17546/// are not written to memory).
17547///
17548/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17549#[inline]
17550#[target_feature(enable = "avx512f,avx512vl")]
17551#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17552#[rustc_legacy_const_generics(4)]
17553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17554pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17555    base_addr: *mut i64,
17556    k: __mmask8,
17557    vindex: __m256i,
17558    a: __m256i,
17559) {
17560    static_assert_imm8_scale!(SCALE);
17561    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17562}
17563
17564/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17565/// at packed 64-bit integer indices stored in vindex scaled by scale
17566///
17567/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17568#[inline]
17569#[target_feature(enable = "avx512f,avx512vl")]
17570#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17571#[rustc_legacy_const_generics(3)]
17572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17573pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17574    base_addr: *mut f64,
17575    vindex: __m256i,
17576    a: __m256d,
17577) {
17578    static_assert_imm8_scale!(SCALE);
17579    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17580}
17581
17582/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17583/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17584/// mask bit is not set are not written to memory).
17585///
17586/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17587#[inline]
17588#[target_feature(enable = "avx512f,avx512vl")]
17589#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17590#[rustc_legacy_const_generics(4)]
17591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17592pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17593    base_addr: *mut f64,
17594    k: __mmask8,
17595    vindex: __m256i,
17596    a: __m256d,
17597) {
17598    static_assert_imm8_scale!(SCALE);
17599    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17600}
17601
17602/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17603/// at packed 64-bit integer indices stored in vindex scaled by scale
17604///
17605/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17606#[inline]
17607#[target_feature(enable = "avx512f,avx512vl")]
17608#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17609#[rustc_legacy_const_generics(3)]
17610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17611pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17612    base_addr: *mut f32,
17613    vindex: __m256i,
17614    a: __m128,
17615) {
17616    static_assert_imm8_scale!(SCALE);
17617    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17618}
17619
17620/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17621/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17622/// mask bit is not set are not written to memory).
17623///
17624/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17625#[inline]
17626#[target_feature(enable = "avx512f,avx512vl")]
17627#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17628#[rustc_legacy_const_generics(4)]
17629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17630pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17631    base_addr: *mut f32,
17632    k: __mmask8,
17633    vindex: __m256i,
17634    a: __m128,
17635) {
17636    static_assert_imm8_scale!(SCALE);
17637    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17638}
17639
17640/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17641/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17642/// mask bit is not set).
17643///
17644/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17645#[inline]
17646#[target_feature(enable = "avx512f,avx512vl")]
17647#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17648#[rustc_legacy_const_generics(4)]
17649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17650pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17651    src: __m256i,
17652    k: __mmask8,
17653    vindex: __m256i,
17654    base_addr: *const i32,
17655) -> __m256i {
17656    static_assert_imm8_scale!(SCALE);
17657    transmute(vpgatherdd_256(
17658        src.as_i32x8(),
17659        base_addr as _,
17660        vindex.as_i32x8(),
17661        k,
17662        SCALE,
17663    ))
17664}
17665
17666/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17667/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17668/// mask bit is not set).
17669///
17670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17671#[inline]
17672#[target_feature(enable = "avx512f,avx512vl")]
17673#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17674#[rustc_legacy_const_generics(4)]
17675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17676pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17677    src: __m256i,
17678    k: __mmask8,
17679    vindex: __m128i,
17680    base_addr: *const i64,
17681) -> __m256i {
17682    static_assert_imm8_scale!(SCALE);
17683    transmute(vpgatherdq_256(
17684        src.as_i64x4(),
17685        base_addr as _,
17686        vindex.as_i32x4(),
17687        k,
17688        SCALE,
17689    ))
17690}
17691
17692/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17693/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17694/// from src when the corresponding mask bit is not set).
17695///
17696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17697#[inline]
17698#[target_feature(enable = "avx512f,avx512vl")]
17699#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17700#[rustc_legacy_const_generics(4)]
17701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17702pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17703    src: __m256d,
17704    k: __mmask8,
17705    vindex: __m128i,
17706    base_addr: *const f64,
17707) -> __m256d {
17708    static_assert_imm8_scale!(SCALE);
17709    transmute(vgatherdpd_256(
17710        src.as_f64x4(),
17711        base_addr as _,
17712        vindex.as_i32x4(),
17713        k,
17714        SCALE,
17715    ))
17716}
17717
17718/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17719/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17720/// from src when the corresponding mask bit is not set).
17721///
17722/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17723#[inline]
17724#[target_feature(enable = "avx512f,avx512vl")]
17725#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17726#[rustc_legacy_const_generics(4)]
17727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17728pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17729    src: __m256,
17730    k: __mmask8,
17731    vindex: __m256i,
17732    base_addr: *const f32,
17733) -> __m256 {
17734    static_assert_imm8_scale!(SCALE);
17735    transmute(vgatherdps_256(
17736        src.as_f32x8(),
17737        base_addr as _,
17738        vindex.as_i32x8(),
17739        k,
17740        SCALE,
17741    ))
17742}
17743
17744/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17745/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17746/// mask bit is not set).
17747///
17748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17749#[inline]
17750#[target_feature(enable = "avx512f,avx512vl")]
17751#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17752#[rustc_legacy_const_generics(4)]
17753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17754pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17755    src: __m128i,
17756    k: __mmask8,
17757    vindex: __m256i,
17758    base_addr: *const i32,
17759) -> __m128i {
17760    static_assert_imm8_scale!(SCALE);
17761    transmute(vpgatherqd_256(
17762        src.as_i32x4(),
17763        base_addr as _,
17764        vindex.as_i64x4(),
17765        k,
17766        SCALE,
17767    ))
17768}
17769
17770/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17771/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17772/// mask bit is not set).
17773///
17774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17775#[inline]
17776#[target_feature(enable = "avx512f,avx512vl")]
17777#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17778#[rustc_legacy_const_generics(4)]
17779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17780pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17781    src: __m256i,
17782    k: __mmask8,
17783    vindex: __m256i,
17784    base_addr: *const i64,
17785) -> __m256i {
17786    static_assert_imm8_scale!(SCALE);
17787    transmute(vpgatherqq_256(
17788        src.as_i64x4(),
17789        base_addr as _,
17790        vindex.as_i64x4(),
17791        k,
17792        SCALE,
17793    ))
17794}
17795
17796/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17797/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17798/// from src when the corresponding mask bit is not set).
17799///
17800/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17801#[inline]
17802#[target_feature(enable = "avx512f,avx512vl")]
17803#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17804#[rustc_legacy_const_generics(4)]
17805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17806pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17807    src: __m256d,
17808    k: __mmask8,
17809    vindex: __m256i,
17810    base_addr: *const f64,
17811) -> __m256d {
17812    static_assert_imm8_scale!(SCALE);
17813    transmute(vgatherqpd_256(
17814        src.as_f64x4(),
17815        base_addr as _,
17816        vindex.as_i64x4(),
17817        k,
17818        SCALE,
17819    ))
17820}
17821
17822/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17823/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17824/// from src when the corresponding mask bit is not set).
17825///
17826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17827#[inline]
17828#[target_feature(enable = "avx512f,avx512vl")]
17829#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17830#[rustc_legacy_const_generics(4)]
17831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17832pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17833    src: __m128,
17834    k: __mmask8,
17835    vindex: __m256i,
17836    base_addr: *const f32,
17837) -> __m128 {
17838    static_assert_imm8_scale!(SCALE);
17839    transmute(vgatherqps_256(
17840        src.as_f32x4(),
17841        base_addr as _,
17842        vindex.as_i64x4(),
17843        k,
17844        SCALE,
17845    ))
17846}
17847
17848/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17849/// indices stored in vindex scaled by scale
17850///
17851/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17852#[inline]
17853#[target_feature(enable = "avx512f,avx512vl")]
17854#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17855#[rustc_legacy_const_generics(3)]
17856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17857pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17858    base_addr: *mut i32,
17859    vindex: __m128i,
17860    a: __m128i,
17861) {
17862    static_assert_imm8_scale!(SCALE);
17863    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17864}
17865
17866/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17867/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17868/// are not written to memory).
17869///
17870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17871#[inline]
17872#[target_feature(enable = "avx512f,avx512vl")]
17873#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17874#[rustc_legacy_const_generics(4)]
17875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17876pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17877    base_addr: *mut i32,
17878    k: __mmask8,
17879    vindex: __m128i,
17880    a: __m128i,
17881) {
17882    static_assert_imm8_scale!(SCALE);
17883    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17884}
17885
17886/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17887/// indices stored in vindex scaled by scale
17888///
17889/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17890#[inline]
17891#[target_feature(enable = "avx512f,avx512vl")]
17892#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17893#[rustc_legacy_const_generics(3)]
17894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17895pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17896    base_addr: *mut i64,
17897    vindex: __m128i,
17898    a: __m128i,
17899) {
17900    static_assert_imm8_scale!(SCALE);
17901    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17902}
17903
17904/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17905/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17906/// are not written to memory).
17907///
17908/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17909#[inline]
17910#[target_feature(enable = "avx512f,avx512vl")]
17911#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17912#[rustc_legacy_const_generics(4)]
17913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17914pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17915    base_addr: *mut i64,
17916    k: __mmask8,
17917    vindex: __m128i,
17918    a: __m128i,
17919) {
17920    static_assert_imm8_scale!(SCALE);
17921    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17922}
17923
17924/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17925/// at packed 32-bit integer indices stored in vindex scaled by scale
17926///
17927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17928#[inline]
17929#[target_feature(enable = "avx512f,avx512vl")]
17930#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17931#[rustc_legacy_const_generics(3)]
17932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17933pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
17934    base_addr: *mut f64,
17935    vindex: __m128i,
17936    a: __m128d,
17937) {
17938    static_assert_imm8_scale!(SCALE);
17939    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17940}
17941
17942/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17943/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17944/// mask bit is not set are not written to memory).
17945///
17946/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
17947#[inline]
17948#[target_feature(enable = "avx512f,avx512vl")]
17949#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17950#[rustc_legacy_const_generics(4)]
17951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17952pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
17953    base_addr: *mut f64,
17954    k: __mmask8,
17955    vindex: __m128i,
17956    a: __m128d,
17957) {
17958    static_assert_imm8_scale!(SCALE);
17959    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17960}
17961
17962/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17963/// at packed 32-bit integer indices stored in vindex scaled by scale
17964///
17965/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
17966#[inline]
17967#[target_feature(enable = "avx512f,avx512vl")]
17968#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17969#[rustc_legacy_const_generics(3)]
17970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17971pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
17972    static_assert_imm8_scale!(SCALE);
17973    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17974}
17975
17976/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17977/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17978/// mask bit is not set are not written to memory).
17979///
17980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
17981#[inline]
17982#[target_feature(enable = "avx512f,avx512vl")]
17983#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17984#[rustc_legacy_const_generics(4)]
17985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17986pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
17987    base_addr: *mut f32,
17988    k: __mmask8,
17989    vindex: __m128i,
17990    a: __m128,
17991) {
17992    static_assert_imm8_scale!(SCALE);
17993    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17994}
17995
17996/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17997/// indices stored in vindex scaled by scale
17998///
17999/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18000#[inline]
18001#[target_feature(enable = "avx512f,avx512vl")]
18002#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18003#[rustc_legacy_const_generics(3)]
18004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18005pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18006    base_addr: *mut i32,
18007    vindex: __m128i,
18008    a: __m128i,
18009) {
18010    static_assert_imm8_scale!(SCALE);
18011    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18012}
18013
18014/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18015/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18016/// are not written to memory).
18017///
18018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18019#[inline]
18020#[target_feature(enable = "avx512f,avx512vl")]
18021#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18022#[rustc_legacy_const_generics(4)]
18023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18024pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18025    base_addr: *mut i32,
18026    k: __mmask8,
18027    vindex: __m128i,
18028    a: __m128i,
18029) {
18030    static_assert_imm8_scale!(SCALE);
18031    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18032}
18033
18034/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18035/// indices stored in vindex scaled by scale
18036///
18037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18038#[inline]
18039#[target_feature(enable = "avx512f,avx512vl")]
18040#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18041#[rustc_legacy_const_generics(3)]
18042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18043pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18044    base_addr: *mut i64,
18045    vindex: __m128i,
18046    a: __m128i,
18047) {
18048    static_assert_imm8_scale!(SCALE);
18049    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18050}
18051
18052/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18053/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18054/// are not written to memory).
18055///
18056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18057#[inline]
18058#[target_feature(enable = "avx512f,avx512vl")]
18059#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18060#[rustc_legacy_const_generics(4)]
18061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18062pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18063    base_addr: *mut i64,
18064    k: __mmask8,
18065    vindex: __m128i,
18066    a: __m128i,
18067) {
18068    static_assert_imm8_scale!(SCALE);
18069    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18070}
18071
18072/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18073/// at packed 64-bit integer indices stored in vindex scaled by scale
18074///
18075/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18076#[inline]
18077#[target_feature(enable = "avx512f,avx512vl")]
18078#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18079#[rustc_legacy_const_generics(3)]
18080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18081pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18082    base_addr: *mut f64,
18083    vindex: __m128i,
18084    a: __m128d,
18085) {
18086    static_assert_imm8_scale!(SCALE);
18087    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18088}
18089
18090/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18091/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18092/// mask bit is not set are not written to memory).
18093///
18094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18095#[inline]
18096#[target_feature(enable = "avx512f,avx512vl")]
18097#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18098#[rustc_legacy_const_generics(4)]
18099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18100pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18101    base_addr: *mut f64,
18102    k: __mmask8,
18103    vindex: __m128i,
18104    a: __m128d,
18105) {
18106    static_assert_imm8_scale!(SCALE);
18107    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18108}
18109
18110/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18111/// at packed 64-bit integer indices stored in vindex scaled by scale
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18117#[rustc_legacy_const_generics(3)]
18118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18119pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18120    static_assert_imm8_scale!(SCALE);
18121    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18122}
18123
18124/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18125/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18126///
18127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18128#[inline]
18129#[target_feature(enable = "avx512f,avx512vl")]
18130#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18131#[rustc_legacy_const_generics(4)]
18132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18133pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18134    base_addr: *mut f32,
18135    k: __mmask8,
18136    vindex: __m128i,
18137    a: __m128,
18138) {
18139    static_assert_imm8_scale!(SCALE);
18140    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18141}
18142
18143/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18144/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18145/// mask bit is not set).
18146///
18147/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18148#[inline]
18149#[target_feature(enable = "avx512f,avx512vl")]
18150#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18151#[rustc_legacy_const_generics(4)]
18152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18153pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18154    src: __m128i,
18155    k: __mmask8,
18156    vindex: __m128i,
18157    base_addr: *const i32,
18158) -> __m128i {
18159    static_assert_imm8_scale!(SCALE);
18160    transmute(vpgatherdd_128(
18161        src.as_i32x4(),
18162        base_addr as _,
18163        vindex.as_i32x4(),
18164        k,
18165        SCALE,
18166    ))
18167}
18168
18169/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18170/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18171/// mask bit is not set).
18172///
18173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18174#[inline]
18175#[target_feature(enable = "avx512f,avx512vl")]
18176#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18177#[rustc_legacy_const_generics(4)]
18178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18179pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18180    src: __m128i,
18181    k: __mmask8,
18182    vindex: __m128i,
18183    base_addr: *const i64,
18184) -> __m128i {
18185    static_assert_imm8_scale!(SCALE);
18186    transmute(vpgatherdq_128(
18187        src.as_i64x2(),
18188        base_addr as _,
18189        vindex.as_i32x4(),
18190        k,
18191        SCALE,
18192    ))
18193}
18194
18195/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18196/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18197/// from src when the corresponding mask bit is not set).
18198///
18199/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18200#[inline]
18201#[target_feature(enable = "avx512f,avx512vl")]
18202#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18203#[rustc_legacy_const_generics(4)]
18204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18205pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18206    src: __m128d,
18207    k: __mmask8,
18208    vindex: __m128i,
18209    base_addr: *const f64,
18210) -> __m128d {
18211    static_assert_imm8_scale!(SCALE);
18212    transmute(vgatherdpd_128(
18213        src.as_f64x2(),
18214        base_addr as _,
18215        vindex.as_i32x4(),
18216        k,
18217        SCALE,
18218    ))
18219}
18220
18221/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18222/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18223/// from src when the corresponding mask bit is not set).
18224///
18225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18226#[inline]
18227#[target_feature(enable = "avx512f,avx512vl")]
18228#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18229#[rustc_legacy_const_generics(4)]
18230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18231pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18232    src: __m128,
18233    k: __mmask8,
18234    vindex: __m128i,
18235    base_addr: *const f32,
18236) -> __m128 {
18237    static_assert_imm8_scale!(SCALE);
18238    transmute(vgatherdps_128(
18239        src.as_f32x4(),
18240        base_addr as _,
18241        vindex.as_i32x4(),
18242        k,
18243        SCALE,
18244    ))
18245}
18246
18247/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18248/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18249/// mask bit is not set).
18250///
18251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18252#[inline]
18253#[target_feature(enable = "avx512f,avx512vl")]
18254#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18255#[rustc_legacy_const_generics(4)]
18256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18257pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18258    src: __m128i,
18259    k: __mmask8,
18260    vindex: __m128i,
18261    base_addr: *const i32,
18262) -> __m128i {
18263    static_assert_imm8_scale!(SCALE);
18264    transmute(vpgatherqd_128(
18265        src.as_i32x4(),
18266        base_addr as _,
18267        vindex.as_i64x2(),
18268        k,
18269        SCALE,
18270    ))
18271}
18272
18273/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18274/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18275/// mask bit is not set).
18276///
18277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18278#[inline]
18279#[target_feature(enable = "avx512f,avx512vl")]
18280#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18281#[rustc_legacy_const_generics(4)]
18282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18283pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18284    src: __m128i,
18285    k: __mmask8,
18286    vindex: __m128i,
18287    base_addr: *const i64,
18288) -> __m128i {
18289    static_assert_imm8_scale!(SCALE);
18290    transmute(vpgatherqq_128(
18291        src.as_i64x2(),
18292        base_addr as _,
18293        vindex.as_i64x2(),
18294        k,
18295        SCALE,
18296    ))
18297}
18298
18299/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18300/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18301/// from src when the corresponding mask bit is not set).
18302///
18303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18304#[inline]
18305#[target_feature(enable = "avx512f,avx512vl")]
18306#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18307#[rustc_legacy_const_generics(4)]
18308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18309pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18310    src: __m128d,
18311    k: __mmask8,
18312    vindex: __m128i,
18313    base_addr: *const f64,
18314) -> __m128d {
18315    static_assert_imm8_scale!(SCALE);
18316    transmute(vgatherqpd_128(
18317        src.as_f64x2(),
18318        base_addr as _,
18319        vindex.as_i64x2(),
18320        k,
18321        SCALE,
18322    ))
18323}
18324
18325/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18326/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18327/// from src when the corresponding mask bit is not set).
18328///
18329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18330#[inline]
18331#[target_feature(enable = "avx512f,avx512vl")]
18332#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18333#[rustc_legacy_const_generics(4)]
18334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18335pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18336    src: __m128,
18337    k: __mmask8,
18338    vindex: __m128i,
18339    base_addr: *const f32,
18340) -> __m128 {
18341    static_assert_imm8_scale!(SCALE);
18342    transmute(vgatherqps_128(
18343        src.as_f32x4(),
18344        base_addr as _,
18345        vindex.as_i64x2(),
18346        k,
18347        SCALE,
18348    ))
18349}
18350
18351/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18352///
18353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18354#[inline]
18355#[target_feature(enable = "avx512f")]
18356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18357#[cfg_attr(test, assert_instr(vpcompressd))]
18358pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18359    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18360}
18361
18362/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18363///
18364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18365#[inline]
18366#[target_feature(enable = "avx512f")]
18367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18368#[cfg_attr(test, assert_instr(vpcompressd))]
18369pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18370    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18371}
18372
18373/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18374///
18375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18376#[inline]
18377#[target_feature(enable = "avx512f,avx512vl")]
18378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18379#[cfg_attr(test, assert_instr(vpcompressd))]
18380pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18381    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18382}
18383
18384/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18385///
18386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18387#[inline]
18388#[target_feature(enable = "avx512f,avx512vl")]
18389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18390#[cfg_attr(test, assert_instr(vpcompressd))]
18391pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18392    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18393}
18394
18395/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18396///
18397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18398#[inline]
18399#[target_feature(enable = "avx512f,avx512vl")]
18400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18401#[cfg_attr(test, assert_instr(vpcompressd))]
18402pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18403    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18404}
18405
18406/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18407///
18408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18409#[inline]
18410#[target_feature(enable = "avx512f,avx512vl")]
18411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18412#[cfg_attr(test, assert_instr(vpcompressd))]
18413pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18414    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18415}
18416
18417/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18418///
18419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18420#[inline]
18421#[target_feature(enable = "avx512f")]
18422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18423#[cfg_attr(test, assert_instr(vpcompressq))]
18424pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18425    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18426}
18427
18428/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18429///
18430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18431#[inline]
18432#[target_feature(enable = "avx512f")]
18433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18434#[cfg_attr(test, assert_instr(vpcompressq))]
18435pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18436    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18437}
18438
18439/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18440///
18441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18442#[inline]
18443#[target_feature(enable = "avx512f,avx512vl")]
18444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18445#[cfg_attr(test, assert_instr(vpcompressq))]
18446pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18447    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18448}
18449
18450/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18451///
18452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18453#[inline]
18454#[target_feature(enable = "avx512f,avx512vl")]
18455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18456#[cfg_attr(test, assert_instr(vpcompressq))]
18457pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18458    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18459}
18460
18461/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18462///
18463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18464#[inline]
18465#[target_feature(enable = "avx512f,avx512vl")]
18466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18467#[cfg_attr(test, assert_instr(vpcompressq))]
18468pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18469    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18470}
18471
18472/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18473///
18474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18475#[inline]
18476#[target_feature(enable = "avx512f,avx512vl")]
18477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18478#[cfg_attr(test, assert_instr(vpcompressq))]
18479pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18480    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18481}
18482
18483/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18484///
18485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18486#[inline]
18487#[target_feature(enable = "avx512f")]
18488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18489#[cfg_attr(test, assert_instr(vcompressps))]
18490pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18491    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18492}
18493
18494/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18495///
18496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18497#[inline]
18498#[target_feature(enable = "avx512f")]
18499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18500#[cfg_attr(test, assert_instr(vcompressps))]
18501pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18502    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18503}
18504
18505/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18506///
18507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18508#[inline]
18509#[target_feature(enable = "avx512f,avx512vl")]
18510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18511#[cfg_attr(test, assert_instr(vcompressps))]
18512pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18513    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18514}
18515
18516/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18517///
18518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18519#[inline]
18520#[target_feature(enable = "avx512f,avx512vl")]
18521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18522#[cfg_attr(test, assert_instr(vcompressps))]
18523pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18524    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18525}
18526
18527/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18528///
18529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18530#[inline]
18531#[target_feature(enable = "avx512f,avx512vl")]
18532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18533#[cfg_attr(test, assert_instr(vcompressps))]
18534pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18535    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18536}
18537
18538/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18539///
18540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18541#[inline]
18542#[target_feature(enable = "avx512f,avx512vl")]
18543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18544#[cfg_attr(test, assert_instr(vcompressps))]
18545pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18546    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18547}
18548
18549/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18550///
18551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18552#[inline]
18553#[target_feature(enable = "avx512f")]
18554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18555#[cfg_attr(test, assert_instr(vcompresspd))]
18556pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18557    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18558}
18559
18560/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18561///
18562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18563#[inline]
18564#[target_feature(enable = "avx512f")]
18565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18566#[cfg_attr(test, assert_instr(vcompresspd))]
18567pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18568    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18569}
18570
18571/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18572///
18573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18574#[inline]
18575#[target_feature(enable = "avx512f,avx512vl")]
18576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18577#[cfg_attr(test, assert_instr(vcompresspd))]
18578pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18579    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18580}
18581
18582/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18583///
18584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18585#[inline]
18586#[target_feature(enable = "avx512f,avx512vl")]
18587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18588#[cfg_attr(test, assert_instr(vcompresspd))]
18589pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18590    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18591}
18592
18593/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18594///
18595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18596#[inline]
18597#[target_feature(enable = "avx512f,avx512vl")]
18598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18599#[cfg_attr(test, assert_instr(vcompresspd))]
18600pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18601    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18602}
18603
18604/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18605///
18606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18607#[inline]
18608#[target_feature(enable = "avx512f,avx512vl")]
18609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18610#[cfg_attr(test, assert_instr(vcompresspd))]
18611pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18612    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18613}
18614
18615/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18616///
18617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18618#[inline]
18619#[target_feature(enable = "avx512f")]
18620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18621#[cfg_attr(test, assert_instr(vpcompressd))]
18622pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
18623    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18624}
18625
18626/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18627///
18628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18629#[inline]
18630#[target_feature(enable = "avx512f,avx512vl")]
18631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18632#[cfg_attr(test, assert_instr(vpcompressd))]
18633pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
18634    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18635}
18636
18637/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18638///
18639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18640#[inline]
18641#[target_feature(enable = "avx512f,avx512vl")]
18642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18643#[cfg_attr(test, assert_instr(vpcompressd))]
18644pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
18645    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18646}
18647
18648/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18649///
18650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18651#[inline]
18652#[target_feature(enable = "avx512f")]
18653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18654#[cfg_attr(test, assert_instr(vpcompressq))]
18655pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
18656    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18657}
18658
18659/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18660///
18661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18662#[inline]
18663#[target_feature(enable = "avx512f,avx512vl")]
18664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18665#[cfg_attr(test, assert_instr(vpcompressq))]
18666pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
18667    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18668}
18669
18670/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18671///
18672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18673#[inline]
18674#[target_feature(enable = "avx512f,avx512vl")]
18675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18676#[cfg_attr(test, assert_instr(vpcompressq))]
18677pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
18678    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18679}
18680
18681/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18682///
18683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18684#[inline]
18685#[target_feature(enable = "avx512f")]
18686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18687#[cfg_attr(test, assert_instr(vcompressps))]
18688pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
18689    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18690}
18691
18692/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18693///
18694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18695#[inline]
18696#[target_feature(enable = "avx512f,avx512vl")]
18697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18698#[cfg_attr(test, assert_instr(vcompressps))]
18699pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
18700    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18701}
18702
18703/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18704///
18705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18706#[inline]
18707#[target_feature(enable = "avx512f,avx512vl")]
18708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18709#[cfg_attr(test, assert_instr(vcompressps))]
18710pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
18711    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18712}
18713
18714/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18715///
18716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18717#[inline]
18718#[target_feature(enable = "avx512f")]
18719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18720#[cfg_attr(test, assert_instr(vcompresspd))]
18721pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
18722    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18723}
18724
18725/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18726///
18727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18728#[inline]
18729#[target_feature(enable = "avx512f,avx512vl")]
18730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18731#[cfg_attr(test, assert_instr(vcompresspd))]
18732pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
18733    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18734}
18735
18736/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18737///
18738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18739#[inline]
18740#[target_feature(enable = "avx512f,avx512vl")]
18741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18742#[cfg_attr(test, assert_instr(vcompresspd))]
18743pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
18744    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18745}
18746
18747/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18748///
18749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18750#[inline]
18751#[target_feature(enable = "avx512f")]
18752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18753#[cfg_attr(test, assert_instr(vpexpandd))]
18754pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18755    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18756}
18757
18758/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18759///
18760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18761#[inline]
18762#[target_feature(enable = "avx512f")]
18763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18764#[cfg_attr(test, assert_instr(vpexpandd))]
18765pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18766    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18767}
18768
18769/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18770///
18771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18772#[inline]
18773#[target_feature(enable = "avx512f,avx512vl")]
18774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18775#[cfg_attr(test, assert_instr(vpexpandd))]
18776pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18777    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18778}
18779
18780/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18781///
18782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18783#[inline]
18784#[target_feature(enable = "avx512f,avx512vl")]
18785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18786#[cfg_attr(test, assert_instr(vpexpandd))]
18787pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18788    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18789}
18790
18791/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18792///
18793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18794#[inline]
18795#[target_feature(enable = "avx512f,avx512vl")]
18796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18797#[cfg_attr(test, assert_instr(vpexpandd))]
18798pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18799    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18800}
18801
18802/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18803///
18804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18805#[inline]
18806#[target_feature(enable = "avx512f,avx512vl")]
18807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18808#[cfg_attr(test, assert_instr(vpexpandd))]
18809pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18810    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18811}
18812
18813/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18814///
18815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18816#[inline]
18817#[target_feature(enable = "avx512f")]
18818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18819#[cfg_attr(test, assert_instr(vpexpandq))]
18820pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18821    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18822}
18823
18824/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18825///
18826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18827#[inline]
18828#[target_feature(enable = "avx512f")]
18829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18830#[cfg_attr(test, assert_instr(vpexpandq))]
18831pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18832    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18833}
18834
18835/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18836///
18837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18838#[inline]
18839#[target_feature(enable = "avx512f,avx512vl")]
18840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18841#[cfg_attr(test, assert_instr(vpexpandq))]
18842pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18843    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18844}
18845
18846/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18847///
18848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18849#[inline]
18850#[target_feature(enable = "avx512f,avx512vl")]
18851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18852#[cfg_attr(test, assert_instr(vpexpandq))]
18853pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18854    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18855}
18856
18857/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18858///
18859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18860#[inline]
18861#[target_feature(enable = "avx512f,avx512vl")]
18862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18863#[cfg_attr(test, assert_instr(vpexpandq))]
18864pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18865    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18866}
18867
18868/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18869///
18870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18871#[inline]
18872#[target_feature(enable = "avx512f,avx512vl")]
18873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18874#[cfg_attr(test, assert_instr(vpexpandq))]
18875pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18876    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18877}
18878
18879/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18880///
18881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18882#[inline]
18883#[target_feature(enable = "avx512f")]
18884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18885#[cfg_attr(test, assert_instr(vexpandps))]
18886pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18887    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18888}
18889
18890/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18891///
18892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18893#[inline]
18894#[target_feature(enable = "avx512f")]
18895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18896#[cfg_attr(test, assert_instr(vexpandps))]
18897pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18898    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18899}
18900
18901/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18902///
18903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18904#[inline]
18905#[target_feature(enable = "avx512f,avx512vl")]
18906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18907#[cfg_attr(test, assert_instr(vexpandps))]
18908pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18909    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18910}
18911
18912/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18913///
18914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18915#[inline]
18916#[target_feature(enable = "avx512f,avx512vl")]
18917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18918#[cfg_attr(test, assert_instr(vexpandps))]
18919pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18920    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18921}
18922
18923/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18924///
18925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18926#[inline]
18927#[target_feature(enable = "avx512f,avx512vl")]
18928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18929#[cfg_attr(test, assert_instr(vexpandps))]
18930pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18931    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18932}
18933
18934/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18935///
18936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18937#[inline]
18938#[target_feature(enable = "avx512f,avx512vl")]
18939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18940#[cfg_attr(test, assert_instr(vexpandps))]
18941pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
18942    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
18943}
18944
18945/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18946///
18947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
18948#[inline]
18949#[target_feature(enable = "avx512f")]
18950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18951#[cfg_attr(test, assert_instr(vexpandpd))]
18952pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18953    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
18954}
18955
18956/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18957///
18958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
18959#[inline]
18960#[target_feature(enable = "avx512f")]
18961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18962#[cfg_attr(test, assert_instr(vexpandpd))]
18963pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
18964    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
18965}
18966
18967/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18968///
18969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
18970#[inline]
18971#[target_feature(enable = "avx512f,avx512vl")]
18972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18973#[cfg_attr(test, assert_instr(vexpandpd))]
18974pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18975    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
18976}
18977
18978/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18979///
18980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
18981#[inline]
18982#[target_feature(enable = "avx512f,avx512vl")]
18983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18984#[cfg_attr(test, assert_instr(vexpandpd))]
18985pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
18986    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
18987}
18988
18989/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18990///
18991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
18992#[inline]
18993#[target_feature(enable = "avx512f,avx512vl")]
18994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18995#[cfg_attr(test, assert_instr(vexpandpd))]
18996pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18997    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
18998}
18999
19000/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19001///
19002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19003#[inline]
19004#[target_feature(enable = "avx512f,avx512vl")]
19005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19006#[cfg_attr(test, assert_instr(vexpandpd))]
19007pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19008    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19009}
19010
19011/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19012///
19013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19014#[inline]
19015#[target_feature(enable = "avx512f")]
19016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19017#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19018#[rustc_legacy_const_generics(1)]
19019pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19020    unsafe {
19021        static_assert_uimm_bits!(IMM8, 8);
19022        let a = a.as_i32x16();
19023        let r = vprold(a, IMM8);
19024        transmute(r)
19025    }
19026}
19027
19028/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19029///
19030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19031#[inline]
19032#[target_feature(enable = "avx512f")]
19033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19034#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19035#[rustc_legacy_const_generics(3)]
19036pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19037    unsafe {
19038        static_assert_uimm_bits!(IMM8, 8);
19039        let a = a.as_i32x16();
19040        let r = vprold(a, IMM8);
19041        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19042    }
19043}
19044
19045/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19046///
19047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19048#[inline]
19049#[target_feature(enable = "avx512f")]
19050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19051#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19052#[rustc_legacy_const_generics(2)]
19053pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19054    unsafe {
19055        static_assert_uimm_bits!(IMM8, 8);
19056        let a = a.as_i32x16();
19057        let r = vprold(a, IMM8);
19058        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19059    }
19060}
19061
19062/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19063///
19064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19065#[inline]
19066#[target_feature(enable = "avx512f,avx512vl")]
19067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19068#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19069#[rustc_legacy_const_generics(1)]
19070pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19071    unsafe {
19072        static_assert_uimm_bits!(IMM8, 8);
19073        let a = a.as_i32x8();
19074        let r = vprold256(a, IMM8);
19075        transmute(r)
19076    }
19077}
19078
19079/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19080///
19081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19082#[inline]
19083#[target_feature(enable = "avx512f,avx512vl")]
19084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19085#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19086#[rustc_legacy_const_generics(3)]
19087pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19088    unsafe {
19089        static_assert_uimm_bits!(IMM8, 8);
19090        let a = a.as_i32x8();
19091        let r = vprold256(a, IMM8);
19092        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19093    }
19094}
19095
19096/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19097///
19098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19099#[inline]
19100#[target_feature(enable = "avx512f,avx512vl")]
19101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19102#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19103#[rustc_legacy_const_generics(2)]
19104pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19105    unsafe {
19106        static_assert_uimm_bits!(IMM8, 8);
19107        let a = a.as_i32x8();
19108        let r = vprold256(a, IMM8);
19109        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19110    }
19111}
19112
19113/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19114///
19115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19116#[inline]
19117#[target_feature(enable = "avx512f,avx512vl")]
19118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19119#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19120#[rustc_legacy_const_generics(1)]
19121pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19122    unsafe {
19123        static_assert_uimm_bits!(IMM8, 8);
19124        let a = a.as_i32x4();
19125        let r = vprold128(a, IMM8);
19126        transmute(r)
19127    }
19128}
19129
19130/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19131///
19132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19133#[inline]
19134#[target_feature(enable = "avx512f,avx512vl")]
19135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19136#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19137#[rustc_legacy_const_generics(3)]
19138pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19139    unsafe {
19140        static_assert_uimm_bits!(IMM8, 8);
19141        let a = a.as_i32x4();
19142        let r = vprold128(a, IMM8);
19143        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19144    }
19145}
19146
19147/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19148///
19149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19150#[inline]
19151#[target_feature(enable = "avx512f,avx512vl")]
19152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19153#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19154#[rustc_legacy_const_generics(2)]
19155pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19156    unsafe {
19157        static_assert_uimm_bits!(IMM8, 8);
19158        let a = a.as_i32x4();
19159        let r = vprold128(a, IMM8);
19160        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19161    }
19162}
19163
19164/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19165///
19166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19167#[inline]
19168#[target_feature(enable = "avx512f")]
19169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19170#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19171#[rustc_legacy_const_generics(1)]
19172pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19173    unsafe {
19174        static_assert_uimm_bits!(IMM8, 8);
19175        let a = a.as_i32x16();
19176        let r = vprord(a, IMM8);
19177        transmute(r)
19178    }
19179}
19180
19181/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19182///
19183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19184#[inline]
19185#[target_feature(enable = "avx512f")]
19186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19187#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19188#[rustc_legacy_const_generics(3)]
19189pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19190    unsafe {
19191        static_assert_uimm_bits!(IMM8, 8);
19192        let a = a.as_i32x16();
19193        let r = vprord(a, IMM8);
19194        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19195    }
19196}
19197
19198/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19199///
19200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19201#[inline]
19202#[target_feature(enable = "avx512f")]
19203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19204#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19205#[rustc_legacy_const_generics(2)]
19206pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19207    unsafe {
19208        static_assert_uimm_bits!(IMM8, 8);
19209        let a = a.as_i32x16();
19210        let r = vprord(a, IMM8);
19211        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19212    }
19213}
19214
19215/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19216///
19217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19218#[inline]
19219#[target_feature(enable = "avx512f,avx512vl")]
19220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19221#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19222#[rustc_legacy_const_generics(1)]
19223pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19224    unsafe {
19225        static_assert_uimm_bits!(IMM8, 8);
19226        let a = a.as_i32x8();
19227        let r = vprord256(a, IMM8);
19228        transmute(r)
19229    }
19230}
19231
19232/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19233///
19234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19235#[inline]
19236#[target_feature(enable = "avx512f,avx512vl")]
19237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19238#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19239#[rustc_legacy_const_generics(3)]
19240pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19241    unsafe {
19242        static_assert_uimm_bits!(IMM8, 8);
19243        let a = a.as_i32x8();
19244        let r = vprord256(a, IMM8);
19245        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19246    }
19247}
19248
19249/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250///
19251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19252#[inline]
19253#[target_feature(enable = "avx512f,avx512vl")]
19254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19255#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19256#[rustc_legacy_const_generics(2)]
19257pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19258    unsafe {
19259        static_assert_uimm_bits!(IMM8, 8);
19260        let a = a.as_i32x8();
19261        let r = vprord256(a, IMM8);
19262        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19263    }
19264}
19265
19266/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19267///
19268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19269#[inline]
19270#[target_feature(enable = "avx512f,avx512vl")]
19271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19272#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19273#[rustc_legacy_const_generics(1)]
19274pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19275    unsafe {
19276        static_assert_uimm_bits!(IMM8, 8);
19277        let a = a.as_i32x4();
19278        let r = vprord128(a, IMM8);
19279        transmute(r)
19280    }
19281}
19282
19283/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19284///
19285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19286#[inline]
19287#[target_feature(enable = "avx512f,avx512vl")]
19288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19289#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19290#[rustc_legacy_const_generics(3)]
19291pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19292    unsafe {
19293        static_assert_uimm_bits!(IMM8, 8);
19294        let a = a.as_i32x4();
19295        let r = vprord128(a, IMM8);
19296        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19297    }
19298}
19299
19300/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19301///
19302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19303#[inline]
19304#[target_feature(enable = "avx512f,avx512vl")]
19305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19306#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19307#[rustc_legacy_const_generics(2)]
19308pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19309    unsafe {
19310        static_assert_uimm_bits!(IMM8, 8);
19311        let a = a.as_i32x4();
19312        let r = vprord128(a, IMM8);
19313        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19314    }
19315}
19316
19317/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19318///
19319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19320#[inline]
19321#[target_feature(enable = "avx512f")]
19322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19323#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19324#[rustc_legacy_const_generics(1)]
19325pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19326    unsafe {
19327        static_assert_uimm_bits!(IMM8, 8);
19328        let a = a.as_i64x8();
19329        let r = vprolq(a, IMM8);
19330        transmute(r)
19331    }
19332}
19333
19334/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19335///
19336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19337#[inline]
19338#[target_feature(enable = "avx512f")]
19339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19340#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19341#[rustc_legacy_const_generics(3)]
19342pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19343    unsafe {
19344        static_assert_uimm_bits!(IMM8, 8);
19345        let a = a.as_i64x8();
19346        let r = vprolq(a, IMM8);
19347        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19348    }
19349}
19350
19351/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19352///
19353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19354#[inline]
19355#[target_feature(enable = "avx512f")]
19356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19357#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19358#[rustc_legacy_const_generics(2)]
19359pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19360    unsafe {
19361        static_assert_uimm_bits!(IMM8, 8);
19362        let a = a.as_i64x8();
19363        let r = vprolq(a, IMM8);
19364        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19365    }
19366}
19367
19368/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19369///
19370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19371#[inline]
19372#[target_feature(enable = "avx512f,avx512vl")]
19373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19374#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19375#[rustc_legacy_const_generics(1)]
19376pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19377    unsafe {
19378        static_assert_uimm_bits!(IMM8, 8);
19379        let a = a.as_i64x4();
19380        let r = vprolq256(a, IMM8);
19381        transmute(r)
19382    }
19383}
19384
19385/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19386///
19387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19388#[inline]
19389#[target_feature(enable = "avx512f,avx512vl")]
19390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19391#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19392#[rustc_legacy_const_generics(3)]
19393pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19394    unsafe {
19395        static_assert_uimm_bits!(IMM8, 8);
19396        let a = a.as_i64x4();
19397        let r = vprolq256(a, IMM8);
19398        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19399    }
19400}
19401
19402/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19403///
19404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19405#[inline]
19406#[target_feature(enable = "avx512f,avx512vl")]
19407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19408#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19409#[rustc_legacy_const_generics(2)]
19410pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19411    unsafe {
19412        static_assert_uimm_bits!(IMM8, 8);
19413        let a = a.as_i64x4();
19414        let r = vprolq256(a, IMM8);
19415        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19416    }
19417}
19418
19419/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19420///
19421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19422#[inline]
19423#[target_feature(enable = "avx512f,avx512vl")]
19424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19425#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19426#[rustc_legacy_const_generics(1)]
19427pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19428    unsafe {
19429        static_assert_uimm_bits!(IMM8, 8);
19430        let a = a.as_i64x2();
19431        let r = vprolq128(a, IMM8);
19432        transmute(r)
19433    }
19434}
19435
19436/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437///
19438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19439#[inline]
19440#[target_feature(enable = "avx512f,avx512vl")]
19441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19442#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19443#[rustc_legacy_const_generics(3)]
19444pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19445    unsafe {
19446        static_assert_uimm_bits!(IMM8, 8);
19447        let a = a.as_i64x2();
19448        let r = vprolq128(a, IMM8);
19449        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19450    }
19451}
19452
19453/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19454///
19455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19456#[inline]
19457#[target_feature(enable = "avx512f,avx512vl")]
19458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19459#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19460#[rustc_legacy_const_generics(2)]
19461pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19462    unsafe {
19463        static_assert_uimm_bits!(IMM8, 8);
19464        let a = a.as_i64x2();
19465        let r = vprolq128(a, IMM8);
19466        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19467    }
19468}
19469
19470/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19471///
19472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19473#[inline]
19474#[target_feature(enable = "avx512f")]
19475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19476#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19477#[rustc_legacy_const_generics(1)]
19478pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19479    unsafe {
19480        static_assert_uimm_bits!(IMM8, 8);
19481        let a = a.as_i64x8();
19482        let r = vprorq(a, IMM8);
19483        transmute(r)
19484    }
19485}
19486
19487/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19488///
19489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19490#[inline]
19491#[target_feature(enable = "avx512f")]
19492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19493#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19494#[rustc_legacy_const_generics(3)]
19495pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19496    unsafe {
19497        static_assert_uimm_bits!(IMM8, 8);
19498        let a = a.as_i64x8();
19499        let r = vprorq(a, IMM8);
19500        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19501    }
19502}
19503
19504/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19505///
19506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19507#[inline]
19508#[target_feature(enable = "avx512f")]
19509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19510#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19511#[rustc_legacy_const_generics(2)]
19512pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19513    unsafe {
19514        static_assert_uimm_bits!(IMM8, 8);
19515        let a = a.as_i64x8();
19516        let r = vprorq(a, IMM8);
19517        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19518    }
19519}
19520
19521/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19522///
19523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19524#[inline]
19525#[target_feature(enable = "avx512f,avx512vl")]
19526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19527#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19528#[rustc_legacy_const_generics(1)]
19529pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19530    unsafe {
19531        static_assert_uimm_bits!(IMM8, 8);
19532        let a = a.as_i64x4();
19533        let r = vprorq256(a, IMM8);
19534        transmute(r)
19535    }
19536}
19537
19538/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19539///
19540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19541#[inline]
19542#[target_feature(enable = "avx512f,avx512vl")]
19543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19544#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19545#[rustc_legacy_const_generics(3)]
19546pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19547    unsafe {
19548        static_assert_uimm_bits!(IMM8, 8);
19549        let a = a.as_i64x4();
19550        let r = vprorq256(a, IMM8);
19551        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19552    }
19553}
19554
19555/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19556///
19557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19558#[inline]
19559#[target_feature(enable = "avx512f,avx512vl")]
19560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19561#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19562#[rustc_legacy_const_generics(2)]
19563pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19564    unsafe {
19565        static_assert_uimm_bits!(IMM8, 8);
19566        let a = a.as_i64x4();
19567        let r = vprorq256(a, IMM8);
19568        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19569    }
19570}
19571
19572/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19573///
19574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19575#[inline]
19576#[target_feature(enable = "avx512f,avx512vl")]
19577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19578#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19579#[rustc_legacy_const_generics(1)]
19580pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19581    unsafe {
19582        static_assert_uimm_bits!(IMM8, 8);
19583        let a = a.as_i64x2();
19584        let r = vprorq128(a, IMM8);
19585        transmute(r)
19586    }
19587}
19588
19589/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19590///
19591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19592#[inline]
19593#[target_feature(enable = "avx512f,avx512vl")]
19594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19595#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19596#[rustc_legacy_const_generics(3)]
19597pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19598    unsafe {
19599        static_assert_uimm_bits!(IMM8, 8);
19600        let a = a.as_i64x2();
19601        let r = vprorq128(a, IMM8);
19602        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19603    }
19604}
19605
19606/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19607///
19608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19609#[inline]
19610#[target_feature(enable = "avx512f,avx512vl")]
19611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19612#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19613#[rustc_legacy_const_generics(2)]
19614pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19615    unsafe {
19616        static_assert_uimm_bits!(IMM8, 8);
19617        let a = a.as_i64x2();
19618        let r = vprorq128(a, IMM8);
19619        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19620    }
19621}
19622
19623/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19624///
19625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19626#[inline]
19627#[target_feature(enable = "avx512f")]
19628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19629#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19630#[rustc_legacy_const_generics(1)]
19631pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19632    unsafe {
19633        static_assert_uimm_bits!(IMM8, 8);
19634        if IMM8 >= 32 {
19635            _mm512_setzero_si512()
19636        } else {
19637            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19638        }
19639    }
19640}
19641
19642/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19643///
19644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19645#[inline]
19646#[target_feature(enable = "avx512f")]
19647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19648#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19649#[rustc_legacy_const_generics(3)]
19650pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19651    unsafe {
19652        static_assert_uimm_bits!(IMM8, 8);
19653        let shf = if IMM8 >= 32 {
19654            u32x16::ZERO
19655        } else {
19656            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19657        };
19658        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19659    }
19660}
19661
19662/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19663///
19664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19665#[inline]
19666#[target_feature(enable = "avx512f")]
19667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19668#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19669#[rustc_legacy_const_generics(2)]
19670pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19671    unsafe {
19672        static_assert_uimm_bits!(IMM8, 8);
19673        if IMM8 >= 32 {
19674            _mm512_setzero_si512()
19675        } else {
19676            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19677            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19678        }
19679    }
19680}
19681
19682/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19683///
19684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19685#[inline]
19686#[target_feature(enable = "avx512f,avx512vl")]
19687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19688#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19689#[rustc_legacy_const_generics(3)]
19690pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19691    unsafe {
19692        static_assert_uimm_bits!(IMM8, 8);
19693        let r = if IMM8 >= 32 {
19694            u32x8::ZERO
19695        } else {
19696            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19697        };
19698        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19699    }
19700}
19701
19702/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19703///
19704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19705#[inline]
19706#[target_feature(enable = "avx512f,avx512vl")]
19707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19708#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19709#[rustc_legacy_const_generics(2)]
19710pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19711    unsafe {
19712        static_assert_uimm_bits!(IMM8, 8);
19713        if IMM8 >= 32 {
19714            _mm256_setzero_si256()
19715        } else {
19716            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19717            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19718        }
19719    }
19720}
19721
19722/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19723///
19724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19725#[inline]
19726#[target_feature(enable = "avx512f,avx512vl")]
19727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19728#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19729#[rustc_legacy_const_generics(3)]
19730pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19731    unsafe {
19732        static_assert_uimm_bits!(IMM8, 8);
19733        let r = if IMM8 >= 32 {
19734            u32x4::ZERO
19735        } else {
19736            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19737        };
19738        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19739    }
19740}
19741
19742/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19743///
19744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19745#[inline]
19746#[target_feature(enable = "avx512f,avx512vl")]
19747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19748#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19749#[rustc_legacy_const_generics(2)]
19750pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19751    unsafe {
19752        static_assert_uimm_bits!(IMM8, 8);
19753        if IMM8 >= 32 {
19754            _mm_setzero_si128()
19755        } else {
19756            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19757            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19758        }
19759    }
19760}
19761
19762/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19765#[inline]
19766#[target_feature(enable = "avx512f")]
19767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19768#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19769#[rustc_legacy_const_generics(1)]
19770pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19771    unsafe {
19772        static_assert_uimm_bits!(IMM8, 8);
19773        if IMM8 >= 32 {
19774            _mm512_setzero_si512()
19775        } else {
19776            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19777        }
19778    }
19779}
19780
19781/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19782///
19783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19784#[inline]
19785#[target_feature(enable = "avx512f")]
19786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19787#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19788#[rustc_legacy_const_generics(3)]
19789pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19790    unsafe {
19791        static_assert_uimm_bits!(IMM8, 8);
19792        let shf = if IMM8 >= 32 {
19793            u32x16::ZERO
19794        } else {
19795            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19796        };
19797        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19798    }
19799}
19800
19801/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19802///
19803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19804#[inline]
19805#[target_feature(enable = "avx512f")]
19806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19807#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19808#[rustc_legacy_const_generics(2)]
19809pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19810    unsafe {
19811        static_assert_uimm_bits!(IMM8, 8);
19812        if IMM8 >= 32 {
19813            _mm512_setzero_si512()
19814        } else {
19815            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19816            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19817        }
19818    }
19819}
19820
19821/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19822///
19823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19824#[inline]
19825#[target_feature(enable = "avx512f,avx512vl")]
19826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19827#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19828#[rustc_legacy_const_generics(3)]
19829pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19830    unsafe {
19831        static_assert_uimm_bits!(IMM8, 8);
19832        let r = if IMM8 >= 32 {
19833            u32x8::ZERO
19834        } else {
19835            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19836        };
19837        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19838    }
19839}
19840
19841/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19842///
19843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19844#[inline]
19845#[target_feature(enable = "avx512f,avx512vl")]
19846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19847#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19848#[rustc_legacy_const_generics(2)]
19849pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19850    unsafe {
19851        static_assert_uimm_bits!(IMM8, 8);
19852        if IMM8 >= 32 {
19853            _mm256_setzero_si256()
19854        } else {
19855            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19856            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19857        }
19858    }
19859}
19860
19861/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19862///
19863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19864#[inline]
19865#[target_feature(enable = "avx512f,avx512vl")]
19866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19867#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19868#[rustc_legacy_const_generics(3)]
19869pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19870    unsafe {
19871        static_assert_uimm_bits!(IMM8, 8);
19872        let r = if IMM8 >= 32 {
19873            u32x4::ZERO
19874        } else {
19875            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19876        };
19877        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19878    }
19879}
19880
19881/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19882///
19883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19884#[inline]
19885#[target_feature(enable = "avx512f,avx512vl")]
19886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19887#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19888#[rustc_legacy_const_generics(2)]
19889pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19890    unsafe {
19891        static_assert_uimm_bits!(IMM8, 8);
19892        if IMM8 >= 32 {
19893            _mm_setzero_si128()
19894        } else {
19895            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19896            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19897        }
19898    }
19899}
19900
19901/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19902///
19903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19904#[inline]
19905#[target_feature(enable = "avx512f")]
19906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19907#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19908#[rustc_legacy_const_generics(1)]
19909pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19910    unsafe {
19911        static_assert_uimm_bits!(IMM8, 8);
19912        if IMM8 >= 64 {
19913            _mm512_setzero_si512()
19914        } else {
19915            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19916        }
19917    }
19918}
19919
19920/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19921///
19922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19923#[inline]
19924#[target_feature(enable = "avx512f")]
19925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19926#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19927#[rustc_legacy_const_generics(3)]
19928pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19929    unsafe {
19930        static_assert_uimm_bits!(IMM8, 8);
19931        let shf = if IMM8 >= 64 {
19932            u64x8::ZERO
19933        } else {
19934            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19935        };
19936        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19937    }
19938}
19939
19940/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19941///
19942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19943#[inline]
19944#[target_feature(enable = "avx512f")]
19945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19946#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19947#[rustc_legacy_const_generics(2)]
19948pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19949    unsafe {
19950        static_assert_uimm_bits!(IMM8, 8);
19951        if IMM8 >= 64 {
19952            _mm512_setzero_si512()
19953        } else {
19954            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
19955            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
19956        }
19957    }
19958}
19959
19960/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19961///
19962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19963#[inline]
19964#[target_feature(enable = "avx512f,avx512vl")]
19965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19966#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19967#[rustc_legacy_const_generics(3)]
19968pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19969    unsafe {
19970        static_assert_uimm_bits!(IMM8, 8);
19971        let r = if IMM8 >= 64 {
19972            u64x4::ZERO
19973        } else {
19974            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
19975        };
19976        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
19977    }
19978}
19979
19980/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19981///
19982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19983#[inline]
19984#[target_feature(enable = "avx512f,avx512vl")]
19985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19986#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19987#[rustc_legacy_const_generics(2)]
19988pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19989    unsafe {
19990        static_assert_uimm_bits!(IMM8, 8);
19991        if IMM8 >= 64 {
19992            _mm256_setzero_si256()
19993        } else {
19994            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
19995            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
19996        }
19997    }
19998}
19999
20000/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20001///
20002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20003#[inline]
20004#[target_feature(enable = "avx512f,avx512vl")]
20005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20006#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20007#[rustc_legacy_const_generics(3)]
20008pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20009    unsafe {
20010        static_assert_uimm_bits!(IMM8, 8);
20011        let r = if IMM8 >= 64 {
20012            u64x2::ZERO
20013        } else {
20014            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20015        };
20016        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20017    }
20018}
20019
20020/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20021///
20022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20023#[inline]
20024#[target_feature(enable = "avx512f,avx512vl")]
20025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20026#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20027#[rustc_legacy_const_generics(2)]
20028pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20029    unsafe {
20030        static_assert_uimm_bits!(IMM8, 8);
20031        if IMM8 >= 64 {
20032            _mm_setzero_si128()
20033        } else {
20034            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20035            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20036        }
20037    }
20038}
20039
20040/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20041///
20042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20043#[inline]
20044#[target_feature(enable = "avx512f")]
20045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20046#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20047#[rustc_legacy_const_generics(1)]
20048pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20049    unsafe {
20050        static_assert_uimm_bits!(IMM8, 8);
20051        if IMM8 >= 64 {
20052            _mm512_setzero_si512()
20053        } else {
20054            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20055        }
20056    }
20057}
20058
20059/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20060///
20061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20062#[inline]
20063#[target_feature(enable = "avx512f")]
20064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20065#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20066#[rustc_legacy_const_generics(3)]
20067pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20068    unsafe {
20069        static_assert_uimm_bits!(IMM8, 8);
20070        let shf = if IMM8 >= 64 {
20071            u64x8::ZERO
20072        } else {
20073            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20074        };
20075        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20076    }
20077}
20078
20079/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20080///
20081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20082#[inline]
20083#[target_feature(enable = "avx512f")]
20084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20085#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20086#[rustc_legacy_const_generics(2)]
20087pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20088    unsafe {
20089        static_assert_uimm_bits!(IMM8, 8);
20090        if IMM8 >= 64 {
20091            _mm512_setzero_si512()
20092        } else {
20093            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20094            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20095        }
20096    }
20097}
20098
20099/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20100///
20101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20102#[inline]
20103#[target_feature(enable = "avx512f,avx512vl")]
20104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20105#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20106#[rustc_legacy_const_generics(3)]
20107pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20108    unsafe {
20109        static_assert_uimm_bits!(IMM8, 8);
20110        let r = if IMM8 >= 64 {
20111            u64x4::ZERO
20112        } else {
20113            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20114        };
20115        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20116    }
20117}
20118
20119/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20120///
20121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20122#[inline]
20123#[target_feature(enable = "avx512f,avx512vl")]
20124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20125#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20126#[rustc_legacy_const_generics(2)]
20127pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20128    unsafe {
20129        static_assert_uimm_bits!(IMM8, 8);
20130        if IMM8 >= 64 {
20131            _mm256_setzero_si256()
20132        } else {
20133            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20134            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20135        }
20136    }
20137}
20138
20139/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20140///
20141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20142#[inline]
20143#[target_feature(enable = "avx512f,avx512vl")]
20144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20145#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20146#[rustc_legacy_const_generics(3)]
20147pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20148    unsafe {
20149        static_assert_uimm_bits!(IMM8, 8);
20150        let r = if IMM8 >= 64 {
20151            u64x2::ZERO
20152        } else {
20153            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20154        };
20155        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20156    }
20157}
20158
20159/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20160///
20161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20162#[inline]
20163#[target_feature(enable = "avx512f,avx512vl")]
20164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20165#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20166#[rustc_legacy_const_generics(2)]
20167pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20168    unsafe {
20169        static_assert_uimm_bits!(IMM8, 8);
20170        if IMM8 >= 64 {
20171            _mm_setzero_si128()
20172        } else {
20173            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20174            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20175        }
20176    }
20177}
20178
20179/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20180///
20181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20182#[inline]
20183#[target_feature(enable = "avx512f")]
20184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20185#[cfg_attr(test, assert_instr(vpslld))]
20186pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20187    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20188}
20189
20190/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20191///
20192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20193#[inline]
20194#[target_feature(enable = "avx512f")]
20195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20196#[cfg_attr(test, assert_instr(vpslld))]
20197pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20198    unsafe {
20199        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20200        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20201    }
20202}
20203
20204/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20205///
20206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20207#[inline]
20208#[target_feature(enable = "avx512f")]
20209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20210#[cfg_attr(test, assert_instr(vpslld))]
20211pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20212    unsafe {
20213        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20214        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20215    }
20216}
20217
20218/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20219///
20220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20221#[inline]
20222#[target_feature(enable = "avx512f,avx512vl")]
20223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20224#[cfg_attr(test, assert_instr(vpslld))]
20225pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20226    unsafe {
20227        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20228        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20229    }
20230}
20231
20232/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20233///
20234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20235#[inline]
20236#[target_feature(enable = "avx512f,avx512vl")]
20237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20238#[cfg_attr(test, assert_instr(vpslld))]
20239pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20240    unsafe {
20241        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20242        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20243    }
20244}
20245
20246/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20247///
20248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20249#[inline]
20250#[target_feature(enable = "avx512f,avx512vl")]
20251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252#[cfg_attr(test, assert_instr(vpslld))]
20253pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20254    unsafe {
20255        let shf = _mm_sll_epi32(a, count).as_i32x4();
20256        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20257    }
20258}
20259
20260/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20261///
20262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20263#[inline]
20264#[target_feature(enable = "avx512f,avx512vl")]
20265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20266#[cfg_attr(test, assert_instr(vpslld))]
20267pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20268    unsafe {
20269        let shf = _mm_sll_epi32(a, count).as_i32x4();
20270        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20271    }
20272}
20273
20274/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20275///
20276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20277#[inline]
20278#[target_feature(enable = "avx512f")]
20279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20280#[cfg_attr(test, assert_instr(vpsrld))]
20281pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20282    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20283}
20284
20285/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20286///
20287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20288#[inline]
20289#[target_feature(enable = "avx512f")]
20290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20291#[cfg_attr(test, assert_instr(vpsrld))]
20292pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20293    unsafe {
20294        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20295        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20296    }
20297}
20298
20299/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20300///
20301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20302#[inline]
20303#[target_feature(enable = "avx512f")]
20304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20305#[cfg_attr(test, assert_instr(vpsrld))]
20306pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20307    unsafe {
20308        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20309        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20310    }
20311}
20312
20313/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20314///
20315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20316#[inline]
20317#[target_feature(enable = "avx512f,avx512vl")]
20318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20319#[cfg_attr(test, assert_instr(vpsrld))]
20320pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20321    unsafe {
20322        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20323        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20324    }
20325}
20326
20327/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20328///
20329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20330#[inline]
20331#[target_feature(enable = "avx512f,avx512vl")]
20332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20333#[cfg_attr(test, assert_instr(vpsrld))]
20334pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20335    unsafe {
20336        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20337        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20338    }
20339}
20340
20341/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20342///
20343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20344#[inline]
20345#[target_feature(enable = "avx512f,avx512vl")]
20346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347#[cfg_attr(test, assert_instr(vpsrld))]
20348pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20349    unsafe {
20350        let shf = _mm_srl_epi32(a, count).as_i32x4();
20351        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20352    }
20353}
20354
20355/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20356///
20357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20358#[inline]
20359#[target_feature(enable = "avx512f,avx512vl")]
20360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20361#[cfg_attr(test, assert_instr(vpsrld))]
20362pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20363    unsafe {
20364        let shf = _mm_srl_epi32(a, count).as_i32x4();
20365        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20366    }
20367}
20368
20369/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20370///
20371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20372#[inline]
20373#[target_feature(enable = "avx512f")]
20374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20375#[cfg_attr(test, assert_instr(vpsllq))]
20376pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20377    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20378}
20379
20380/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20381///
20382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20383#[inline]
20384#[target_feature(enable = "avx512f")]
20385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20386#[cfg_attr(test, assert_instr(vpsllq))]
20387pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20388    unsafe {
20389        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20390        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20391    }
20392}
20393
20394/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20395///
20396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20397#[inline]
20398#[target_feature(enable = "avx512f")]
20399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20400#[cfg_attr(test, assert_instr(vpsllq))]
20401pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20402    unsafe {
20403        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20404        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20405    }
20406}
20407
20408/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20409///
20410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20411#[inline]
20412#[target_feature(enable = "avx512f,avx512vl")]
20413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20414#[cfg_attr(test, assert_instr(vpsllq))]
20415pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20416    unsafe {
20417        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20418        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20419    }
20420}
20421
20422/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20423///
20424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20425#[inline]
20426#[target_feature(enable = "avx512f,avx512vl")]
20427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20428#[cfg_attr(test, assert_instr(vpsllq))]
20429pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20430    unsafe {
20431        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20432        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20433    }
20434}
20435
20436/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20437///
20438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20439#[inline]
20440#[target_feature(enable = "avx512f,avx512vl")]
20441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442#[cfg_attr(test, assert_instr(vpsllq))]
20443pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20444    unsafe {
20445        let shf = _mm_sll_epi64(a, count).as_i64x2();
20446        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20447    }
20448}
20449
20450/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20451///
20452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20453#[inline]
20454#[target_feature(enable = "avx512f,avx512vl")]
20455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20456#[cfg_attr(test, assert_instr(vpsllq))]
20457pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20458    unsafe {
20459        let shf = _mm_sll_epi64(a, count).as_i64x2();
20460        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20461    }
20462}
20463
20464/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20465///
20466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20467#[inline]
20468#[target_feature(enable = "avx512f")]
20469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20470#[cfg_attr(test, assert_instr(vpsrlq))]
20471pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20472    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20473}
20474
20475/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20476///
20477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20478#[inline]
20479#[target_feature(enable = "avx512f")]
20480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20481#[cfg_attr(test, assert_instr(vpsrlq))]
20482pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20483    unsafe {
20484        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20485        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20486    }
20487}
20488
20489/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20490///
20491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20492#[inline]
20493#[target_feature(enable = "avx512f")]
20494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20495#[cfg_attr(test, assert_instr(vpsrlq))]
20496pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20497    unsafe {
20498        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20499        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20500    }
20501}
20502
20503/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20504///
20505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20506#[inline]
20507#[target_feature(enable = "avx512f,avx512vl")]
20508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20509#[cfg_attr(test, assert_instr(vpsrlq))]
20510pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20511    unsafe {
20512        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20513        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20514    }
20515}
20516
20517/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20518///
20519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20520#[inline]
20521#[target_feature(enable = "avx512f,avx512vl")]
20522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523#[cfg_attr(test, assert_instr(vpsrlq))]
20524pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20525    unsafe {
20526        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20527        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20528    }
20529}
20530
20531/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20532///
20533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20534#[inline]
20535#[target_feature(enable = "avx512f,avx512vl")]
20536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20537#[cfg_attr(test, assert_instr(vpsrlq))]
20538pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20539    unsafe {
20540        let shf = _mm_srl_epi64(a, count).as_i64x2();
20541        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20542    }
20543}
20544
20545/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20546///
20547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20548#[inline]
20549#[target_feature(enable = "avx512f,avx512vl")]
20550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20551#[cfg_attr(test, assert_instr(vpsrlq))]
20552pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20553    unsafe {
20554        let shf = _mm_srl_epi64(a, count).as_i64x2();
20555        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20556    }
20557}
20558
20559/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20560///
20561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20562#[inline]
20563#[target_feature(enable = "avx512f")]
20564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20565#[cfg_attr(test, assert_instr(vpsrad))]
20566pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20567    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20568}
20569
20570/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20571///
20572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20573#[inline]
20574#[target_feature(enable = "avx512f")]
20575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20576#[cfg_attr(test, assert_instr(vpsrad))]
20577pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20578    unsafe {
20579        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20580        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20581    }
20582}
20583
20584/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20585///
20586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20587#[inline]
20588#[target_feature(enable = "avx512f")]
20589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590#[cfg_attr(test, assert_instr(vpsrad))]
20591pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20592    unsafe {
20593        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20594        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20595    }
20596}
20597
20598/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20599///
20600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20601#[inline]
20602#[target_feature(enable = "avx512f,avx512vl")]
20603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20604#[cfg_attr(test, assert_instr(vpsrad))]
20605pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20606    unsafe {
20607        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20608        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20609    }
20610}
20611
20612/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20613///
20614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20615#[inline]
20616#[target_feature(enable = "avx512f,avx512vl")]
20617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20618#[cfg_attr(test, assert_instr(vpsrad))]
20619pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20620    unsafe {
20621        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20622        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20623    }
20624}
20625
20626/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20627///
20628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20629#[inline]
20630#[target_feature(enable = "avx512f,avx512vl")]
20631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20632#[cfg_attr(test, assert_instr(vpsrad))]
20633pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20634    unsafe {
20635        let shf = _mm_sra_epi32(a, count).as_i32x4();
20636        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20637    }
20638}
20639
20640/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20641///
20642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20643#[inline]
20644#[target_feature(enable = "avx512f,avx512vl")]
20645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20646#[cfg_attr(test, assert_instr(vpsrad))]
20647pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20648    unsafe {
20649        let shf = _mm_sra_epi32(a, count).as_i32x4();
20650        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20651    }
20652}
20653
20654/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20655///
20656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20657#[inline]
20658#[target_feature(enable = "avx512f")]
20659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20660#[cfg_attr(test, assert_instr(vpsraq))]
20661pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20662    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20663}
20664
20665/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20666///
20667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20668#[inline]
20669#[target_feature(enable = "avx512f")]
20670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20671#[cfg_attr(test, assert_instr(vpsraq))]
20672pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20673    unsafe {
20674        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20675        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20676    }
20677}
20678
20679/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20680///
20681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20682#[inline]
20683#[target_feature(enable = "avx512f")]
20684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20685#[cfg_attr(test, assert_instr(vpsraq))]
20686pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20687    unsafe {
20688        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20689        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20690    }
20691}
20692
20693/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20694///
20695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20696#[inline]
20697#[target_feature(enable = "avx512f,avx512vl")]
20698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20699#[cfg_attr(test, assert_instr(vpsraq))]
20700pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20701    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20702}
20703
20704/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20705///
20706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20707#[inline]
20708#[target_feature(enable = "avx512f,avx512vl")]
20709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20710#[cfg_attr(test, assert_instr(vpsraq))]
20711pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20712    unsafe {
20713        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20714        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20715    }
20716}
20717
20718/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20719///
20720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20721#[inline]
20722#[target_feature(enable = "avx512f,avx512vl")]
20723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20724#[cfg_attr(test, assert_instr(vpsraq))]
20725pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20726    unsafe {
20727        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20728        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20729    }
20730}
20731
20732/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20733///
20734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20735#[inline]
20736#[target_feature(enable = "avx512f,avx512vl")]
20737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20738#[cfg_attr(test, assert_instr(vpsraq))]
20739pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20740    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20741}
20742
20743/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20746#[inline]
20747#[target_feature(enable = "avx512f,avx512vl")]
20748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20749#[cfg_attr(test, assert_instr(vpsraq))]
20750pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20751    unsafe {
20752        let shf = _mm_sra_epi64(a, count).as_i64x2();
20753        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20754    }
20755}
20756
20757/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20758///
20759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20760#[inline]
20761#[target_feature(enable = "avx512f,avx512vl")]
20762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20763#[cfg_attr(test, assert_instr(vpsraq))]
20764pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20765    unsafe {
20766        let shf = _mm_sra_epi64(a, count).as_i64x2();
20767        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20768    }
20769}
20770
20771/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20772///
20773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20774#[inline]
20775#[target_feature(enable = "avx512f")]
20776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20777#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20778#[rustc_legacy_const_generics(1)]
20779pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20780    unsafe {
20781        static_assert_uimm_bits!(IMM8, 8);
20782        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20783    }
20784}
20785
20786/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20787///
20788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20789#[inline]
20790#[target_feature(enable = "avx512f")]
20791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20792#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20793#[rustc_legacy_const_generics(3)]
20794pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20795    unsafe {
20796        static_assert_uimm_bits!(IMM8, 8);
20797        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20798        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20799    }
20800}
20801
20802/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20803///
20804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20805#[inline]
20806#[target_feature(enable = "avx512f")]
20807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20808#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20809#[rustc_legacy_const_generics(2)]
20810pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20811    unsafe {
20812        static_assert_uimm_bits!(IMM8, 8);
20813        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20814        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20815    }
20816}
20817
20818/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20819///
20820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20821#[inline]
20822#[target_feature(enable = "avx512f,avx512vl")]
20823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20824#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20825#[rustc_legacy_const_generics(3)]
20826pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20827    unsafe {
20828        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20829        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20830    }
20831}
20832
20833/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20834///
20835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20836#[inline]
20837#[target_feature(enable = "avx512f,avx512vl")]
20838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20839#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20840#[rustc_legacy_const_generics(2)]
20841pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20842    unsafe {
20843        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20844        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20845    }
20846}
20847
20848/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20849///
20850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20851#[inline]
20852#[target_feature(enable = "avx512f,avx512vl")]
20853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20854#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20855#[rustc_legacy_const_generics(3)]
20856pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20857    unsafe {
20858        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20859        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20860    }
20861}
20862
20863/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864///
20865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20866#[inline]
20867#[target_feature(enable = "avx512f,avx512vl")]
20868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20869#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20870#[rustc_legacy_const_generics(2)]
20871pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20872    unsafe {
20873        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20874        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20875    }
20876}
20877
20878/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20879///
20880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20881#[inline]
20882#[target_feature(enable = "avx512f")]
20883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20884#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20885#[rustc_legacy_const_generics(1)]
20886pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20887    unsafe {
20888        static_assert_uimm_bits!(IMM8, 8);
20889        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20890    }
20891}
20892
20893/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20894///
20895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20896#[inline]
20897#[target_feature(enable = "avx512f")]
20898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20899#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20900#[rustc_legacy_const_generics(3)]
20901pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20902    unsafe {
20903        static_assert_uimm_bits!(IMM8, 8);
20904        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20905        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20906    }
20907}
20908
20909/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20910///
20911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20912#[inline]
20913#[target_feature(enable = "avx512f")]
20914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20915#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20916#[rustc_legacy_const_generics(2)]
20917pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20918    unsafe {
20919        static_assert_uimm_bits!(IMM8, 8);
20920        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20921        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20922    }
20923}
20924
20925/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20926///
20927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20928#[inline]
20929#[target_feature(enable = "avx512f,avx512vl")]
20930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20931#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20932#[rustc_legacy_const_generics(1)]
20933pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20934    unsafe {
20935        static_assert_uimm_bits!(IMM8, 8);
20936        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20937    }
20938}
20939
20940/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20941///
20942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20943#[inline]
20944#[target_feature(enable = "avx512f,avx512vl")]
20945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20946#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20947#[rustc_legacy_const_generics(3)]
20948pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20949    unsafe {
20950        static_assert_uimm_bits!(IMM8, 8);
20951        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20952        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20953    }
20954}
20955
20956/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20957///
20958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20959#[inline]
20960#[target_feature(enable = "avx512f,avx512vl")]
20961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20962#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20963#[rustc_legacy_const_generics(2)]
20964pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20965    unsafe {
20966        static_assert_uimm_bits!(IMM8, 8);
20967        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20968        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20969    }
20970}
20971
20972/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20973///
20974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20975#[inline]
20976#[target_feature(enable = "avx512f,avx512vl")]
20977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20978#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20979#[rustc_legacy_const_generics(1)]
20980pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20981    unsafe {
20982        static_assert_uimm_bits!(IMM8, 8);
20983        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
20984    }
20985}
20986
20987/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20988///
20989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20990#[inline]
20991#[target_feature(enable = "avx512f,avx512vl")]
20992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20993#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20994#[rustc_legacy_const_generics(3)]
20995pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20996    unsafe {
20997        static_assert_uimm_bits!(IMM8, 8);
20998        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
20999        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21000    }
21001}
21002
21003/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21004///
21005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21006#[inline]
21007#[target_feature(enable = "avx512f,avx512vl")]
21008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21009#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21010#[rustc_legacy_const_generics(2)]
21011pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21012    unsafe {
21013        static_assert_uimm_bits!(IMM8, 8);
21014        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21015        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21016    }
21017}
21018
21019/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21020///
21021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21022#[inline]
21023#[target_feature(enable = "avx512f")]
21024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21025#[cfg_attr(test, assert_instr(vpsravd))]
21026pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21027    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
21028}
21029
21030/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21031///
21032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21033#[inline]
21034#[target_feature(enable = "avx512f")]
21035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21036#[cfg_attr(test, assert_instr(vpsravd))]
21037pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21038    unsafe {
21039        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21040        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21041    }
21042}
21043
21044/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21045///
21046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21047#[inline]
21048#[target_feature(enable = "avx512f")]
21049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21050#[cfg_attr(test, assert_instr(vpsravd))]
21051pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21052    unsafe {
21053        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21054        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21055    }
21056}
21057
21058/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21059///
21060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21061#[inline]
21062#[target_feature(enable = "avx512f,avx512vl")]
21063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21064#[cfg_attr(test, assert_instr(vpsravd))]
21065pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21066    unsafe {
21067        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21068        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21069    }
21070}
21071
21072/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21073///
21074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21075#[inline]
21076#[target_feature(enable = "avx512f,avx512vl")]
21077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21078#[cfg_attr(test, assert_instr(vpsravd))]
21079pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21080    unsafe {
21081        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21082        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21083    }
21084}
21085
21086/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21087///
21088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21089#[inline]
21090#[target_feature(enable = "avx512f,avx512vl")]
21091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21092#[cfg_attr(test, assert_instr(vpsravd))]
21093pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21094    unsafe {
21095        let shf = _mm_srav_epi32(a, count).as_i32x4();
21096        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21097    }
21098}
21099
21100/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21101///
21102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21103#[inline]
21104#[target_feature(enable = "avx512f,avx512vl")]
21105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21106#[cfg_attr(test, assert_instr(vpsravd))]
21107pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21108    unsafe {
21109        let shf = _mm_srav_epi32(a, count).as_i32x4();
21110        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21111    }
21112}
21113
21114/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21115///
21116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21117#[inline]
21118#[target_feature(enable = "avx512f")]
21119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21120#[cfg_attr(test, assert_instr(vpsravq))]
21121pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21122    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
21123}
21124
21125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21126///
21127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21128#[inline]
21129#[target_feature(enable = "avx512f")]
21130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21131#[cfg_attr(test, assert_instr(vpsravq))]
21132pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21133    unsafe {
21134        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21135        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21136    }
21137}
21138
21139/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21140///
21141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21142#[inline]
21143#[target_feature(enable = "avx512f")]
21144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21145#[cfg_attr(test, assert_instr(vpsravq))]
21146pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21147    unsafe {
21148        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21149        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21150    }
21151}
21152
21153/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21154///
21155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21156#[inline]
21157#[target_feature(enable = "avx512f,avx512vl")]
21158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21159#[cfg_attr(test, assert_instr(vpsravq))]
21160pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21161    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21162}
21163
21164/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21165///
21166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21167#[inline]
21168#[target_feature(enable = "avx512f,avx512vl")]
21169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170#[cfg_attr(test, assert_instr(vpsravq))]
21171pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21172    unsafe {
21173        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21174        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21175    }
21176}
21177
21178/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21179///
21180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21181#[inline]
21182#[target_feature(enable = "avx512f,avx512vl")]
21183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21184#[cfg_attr(test, assert_instr(vpsravq))]
21185pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21186    unsafe {
21187        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21188        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21189    }
21190}
21191
21192/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21193///
21194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21195#[inline]
21196#[target_feature(enable = "avx512f,avx512vl")]
21197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21198#[cfg_attr(test, assert_instr(vpsravq))]
21199pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21200    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21201}
21202
21203/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21204///
21205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21206#[inline]
21207#[target_feature(enable = "avx512f,avx512vl")]
21208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21209#[cfg_attr(test, assert_instr(vpsravq))]
21210pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21211    unsafe {
21212        let shf = _mm_srav_epi64(a, count).as_i64x2();
21213        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21214    }
21215}
21216
21217/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21218///
21219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21220#[inline]
21221#[target_feature(enable = "avx512f,avx512vl")]
21222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21223#[cfg_attr(test, assert_instr(vpsravq))]
21224pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21225    unsafe {
21226        let shf = _mm_srav_epi64(a, count).as_i64x2();
21227        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21228    }
21229}
21230
21231/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21232///
21233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21234#[inline]
21235#[target_feature(enable = "avx512f")]
21236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21237#[cfg_attr(test, assert_instr(vprolvd))]
21238pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21239    unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) }
21240}
21241
21242/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21243///
21244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21245#[inline]
21246#[target_feature(enable = "avx512f")]
21247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21248#[cfg_attr(test, assert_instr(vprolvd))]
21249pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21250    unsafe {
21251        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21252        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21253    }
21254}
21255
21256/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21259#[inline]
21260#[target_feature(enable = "avx512f")]
21261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21262#[cfg_attr(test, assert_instr(vprolvd))]
21263pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21264    unsafe {
21265        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21266        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21267    }
21268}
21269
21270/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21271///
21272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21273#[inline]
21274#[target_feature(enable = "avx512f,avx512vl")]
21275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21276#[cfg_attr(test, assert_instr(vprolvd))]
21277pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21278    unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21279}
21280
21281/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21282///
21283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21284#[inline]
21285#[target_feature(enable = "avx512f,avx512vl")]
21286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21287#[cfg_attr(test, assert_instr(vprolvd))]
21288pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21289    unsafe {
21290        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21291        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21292    }
21293}
21294
21295/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21296///
21297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21298#[inline]
21299#[target_feature(enable = "avx512f,avx512vl")]
21300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21301#[cfg_attr(test, assert_instr(vprolvd))]
21302pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21303    unsafe {
21304        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21305        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21306    }
21307}
21308
21309/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21310///
21311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21312#[inline]
21313#[target_feature(enable = "avx512f,avx512vl")]
21314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21315#[cfg_attr(test, assert_instr(vprolvd))]
21316pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21317    unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21318}
21319
21320/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21321///
21322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21323#[inline]
21324#[target_feature(enable = "avx512f,avx512vl")]
21325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21326#[cfg_attr(test, assert_instr(vprolvd))]
21327pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21328    unsafe {
21329        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21330        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21331    }
21332}
21333
21334/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21335///
21336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21337#[inline]
21338#[target_feature(enable = "avx512f,avx512vl")]
21339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340#[cfg_attr(test, assert_instr(vprolvd))]
21341pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21342    unsafe {
21343        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21344        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21345    }
21346}
21347
21348/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21349///
21350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21351#[inline]
21352#[target_feature(enable = "avx512f")]
21353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21354#[cfg_attr(test, assert_instr(vprorvd))]
21355pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21356    unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) }
21357}
21358
21359/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21360///
21361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21362#[inline]
21363#[target_feature(enable = "avx512f")]
21364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21365#[cfg_attr(test, assert_instr(vprorvd))]
21366pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21367    unsafe {
21368        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21369        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21370    }
21371}
21372
21373/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21374///
21375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21376#[inline]
21377#[target_feature(enable = "avx512f")]
21378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21379#[cfg_attr(test, assert_instr(vprorvd))]
21380pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21381    unsafe {
21382        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21383        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21384    }
21385}
21386
21387/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21388///
21389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21390#[inline]
21391#[target_feature(enable = "avx512f,avx512vl")]
21392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21393#[cfg_attr(test, assert_instr(vprorvd))]
21394pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21395    unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21396}
21397
21398/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21399///
21400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21401#[inline]
21402#[target_feature(enable = "avx512f,avx512vl")]
21403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404#[cfg_attr(test, assert_instr(vprorvd))]
21405pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21406    unsafe {
21407        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21408        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21409    }
21410}
21411
21412/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21413///
21414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21415#[inline]
21416#[target_feature(enable = "avx512f,avx512vl")]
21417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21418#[cfg_attr(test, assert_instr(vprorvd))]
21419pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21420    unsafe {
21421        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21422        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21423    }
21424}
21425
21426/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21427///
21428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21429#[inline]
21430#[target_feature(enable = "avx512f,avx512vl")]
21431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21432#[cfg_attr(test, assert_instr(vprorvd))]
21433pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21434    unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21435}
21436
21437/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21438///
21439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21440#[inline]
21441#[target_feature(enable = "avx512f,avx512vl")]
21442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21443#[cfg_attr(test, assert_instr(vprorvd))]
21444pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21445    unsafe {
21446        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21447        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21448    }
21449}
21450
21451/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21452///
21453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21454#[inline]
21455#[target_feature(enable = "avx512f,avx512vl")]
21456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21457#[cfg_attr(test, assert_instr(vprorvd))]
21458pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21459    unsafe {
21460        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21461        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21462    }
21463}
21464
21465/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21466///
21467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21468#[inline]
21469#[target_feature(enable = "avx512f")]
21470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21471#[cfg_attr(test, assert_instr(vprolvq))]
21472pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21473    unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) }
21474}
21475
21476/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21477///
21478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21479#[inline]
21480#[target_feature(enable = "avx512f")]
21481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21482#[cfg_attr(test, assert_instr(vprolvq))]
21483pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21484    unsafe {
21485        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21486        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21487    }
21488}
21489
21490/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21493#[inline]
21494#[target_feature(enable = "avx512f")]
21495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496#[cfg_attr(test, assert_instr(vprolvq))]
21497pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21498    unsafe {
21499        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21500        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21501    }
21502}
21503
21504/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21505///
21506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21507#[inline]
21508#[target_feature(enable = "avx512f,avx512vl")]
21509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21510#[cfg_attr(test, assert_instr(vprolvq))]
21511pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21512    unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21513}
21514
21515/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21516///
21517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21518#[inline]
21519#[target_feature(enable = "avx512f,avx512vl")]
21520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521#[cfg_attr(test, assert_instr(vprolvq))]
21522pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21523    unsafe {
21524        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21525        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21526    }
21527}
21528
21529/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21530///
21531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21532#[inline]
21533#[target_feature(enable = "avx512f,avx512vl")]
21534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21535#[cfg_attr(test, assert_instr(vprolvq))]
21536pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21537    unsafe {
21538        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21539        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21540    }
21541}
21542
21543/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21544///
21545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21546#[inline]
21547#[target_feature(enable = "avx512f,avx512vl")]
21548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21549#[cfg_attr(test, assert_instr(vprolvq))]
21550pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21551    unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21552}
21553
21554/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21555///
21556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21557#[inline]
21558#[target_feature(enable = "avx512f,avx512vl")]
21559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21560#[cfg_attr(test, assert_instr(vprolvq))]
21561pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21562    unsafe {
21563        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21564        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21565    }
21566}
21567
21568/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21569///
21570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21571#[inline]
21572#[target_feature(enable = "avx512f,avx512vl")]
21573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21574#[cfg_attr(test, assert_instr(vprolvq))]
21575pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21576    unsafe {
21577        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21578        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21579    }
21580}
21581
21582/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21583///
21584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21585#[inline]
21586#[target_feature(enable = "avx512f")]
21587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21588#[cfg_attr(test, assert_instr(vprorvq))]
21589pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21590    unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) }
21591}
21592
21593/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21594///
21595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21596#[inline]
21597#[target_feature(enable = "avx512f")]
21598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21599#[cfg_attr(test, assert_instr(vprorvq))]
21600pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21601    unsafe {
21602        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21603        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21604    }
21605}
21606
21607/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21608///
21609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21610#[inline]
21611#[target_feature(enable = "avx512f")]
21612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21613#[cfg_attr(test, assert_instr(vprorvq))]
21614pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21615    unsafe {
21616        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21617        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21618    }
21619}
21620
21621/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21622///
21623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21624#[inline]
21625#[target_feature(enable = "avx512f,avx512vl")]
21626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21627#[cfg_attr(test, assert_instr(vprorvq))]
21628pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21629    unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21630}
21631
21632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21633///
21634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21635#[inline]
21636#[target_feature(enable = "avx512f,avx512vl")]
21637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21638#[cfg_attr(test, assert_instr(vprorvq))]
21639pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21640    unsafe {
21641        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21642        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21643    }
21644}
21645
21646/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21647///
21648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21649#[inline]
21650#[target_feature(enable = "avx512f,avx512vl")]
21651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652#[cfg_attr(test, assert_instr(vprorvq))]
21653pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21654    unsafe {
21655        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21656        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21657    }
21658}
21659
21660/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21661///
21662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21663#[inline]
21664#[target_feature(enable = "avx512f,avx512vl")]
21665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21666#[cfg_attr(test, assert_instr(vprorvq))]
21667pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21668    unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21669}
21670
21671/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21672///
21673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21674#[inline]
21675#[target_feature(enable = "avx512f,avx512vl")]
21676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21677#[cfg_attr(test, assert_instr(vprorvq))]
21678pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21679    unsafe {
21680        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21681        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21682    }
21683}
21684
21685/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21686///
21687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21688#[inline]
21689#[target_feature(enable = "avx512f,avx512vl")]
21690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21691#[cfg_attr(test, assert_instr(vprorvq))]
21692pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21693    unsafe {
21694        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21695        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21696    }
21697}
21698
21699/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21700///
21701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21702#[inline]
21703#[target_feature(enable = "avx512f")]
21704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21705#[cfg_attr(test, assert_instr(vpsllvd))]
21706pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21707    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
21708}
21709
21710/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21711///
21712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21713#[inline]
21714#[target_feature(enable = "avx512f")]
21715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21716#[cfg_attr(test, assert_instr(vpsllvd))]
21717pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21718    unsafe {
21719        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21720        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21721    }
21722}
21723
21724/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21725///
21726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21727#[inline]
21728#[target_feature(enable = "avx512f")]
21729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21730#[cfg_attr(test, assert_instr(vpsllvd))]
21731pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21732    unsafe {
21733        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21734        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21735    }
21736}
21737
21738/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21739///
21740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21741#[inline]
21742#[target_feature(enable = "avx512f,avx512vl")]
21743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21744#[cfg_attr(test, assert_instr(vpsllvd))]
21745pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21746    unsafe {
21747        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21748        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21749    }
21750}
21751
21752/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21753///
21754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21755#[inline]
21756#[target_feature(enable = "avx512f,avx512vl")]
21757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21758#[cfg_attr(test, assert_instr(vpsllvd))]
21759pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21760    unsafe {
21761        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21762        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21763    }
21764}
21765
21766/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772#[cfg_attr(test, assert_instr(vpsllvd))]
21773pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21774    unsafe {
21775        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21776        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21777    }
21778}
21779
21780/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21781///
21782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21783#[inline]
21784#[target_feature(enable = "avx512f,avx512vl")]
21785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21786#[cfg_attr(test, assert_instr(vpsllvd))]
21787pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21788    unsafe {
21789        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21790        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21791    }
21792}
21793
21794/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21795///
21796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21797#[inline]
21798#[target_feature(enable = "avx512f")]
21799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21800#[cfg_attr(test, assert_instr(vpsrlvd))]
21801pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21802    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
21803}
21804
21805/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21806///
21807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21808#[inline]
21809#[target_feature(enable = "avx512f")]
21810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811#[cfg_attr(test, assert_instr(vpsrlvd))]
21812pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21813    unsafe {
21814        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21815        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21816    }
21817}
21818
21819/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21820///
21821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21822#[inline]
21823#[target_feature(enable = "avx512f")]
21824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21825#[cfg_attr(test, assert_instr(vpsrlvd))]
21826pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21827    unsafe {
21828        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21829        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21830    }
21831}
21832
21833/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21834///
21835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21836#[inline]
21837#[target_feature(enable = "avx512f,avx512vl")]
21838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21839#[cfg_attr(test, assert_instr(vpsrlvd))]
21840pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21841    unsafe {
21842        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21843        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21844    }
21845}
21846
21847/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21848///
21849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21850#[inline]
21851#[target_feature(enable = "avx512f,avx512vl")]
21852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21853#[cfg_attr(test, assert_instr(vpsrlvd))]
21854pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21855    unsafe {
21856        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21857        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21858    }
21859}
21860
21861/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21862///
21863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21864#[inline]
21865#[target_feature(enable = "avx512f,avx512vl")]
21866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21867#[cfg_attr(test, assert_instr(vpsrlvd))]
21868pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21869    unsafe {
21870        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21871        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21872    }
21873}
21874
21875/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21876///
21877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21878#[inline]
21879#[target_feature(enable = "avx512f,avx512vl")]
21880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21881#[cfg_attr(test, assert_instr(vpsrlvd))]
21882pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21883    unsafe {
21884        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21885        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21886    }
21887}
21888
21889/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21890///
21891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21892#[inline]
21893#[target_feature(enable = "avx512f")]
21894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21895#[cfg_attr(test, assert_instr(vpsllvq))]
21896pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21897    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
21898}
21899
21900/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21901///
21902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21903#[inline]
21904#[target_feature(enable = "avx512f")]
21905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21906#[cfg_attr(test, assert_instr(vpsllvq))]
21907pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21908    unsafe {
21909        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21910        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21911    }
21912}
21913
21914/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21915///
21916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21917#[inline]
21918#[target_feature(enable = "avx512f")]
21919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920#[cfg_attr(test, assert_instr(vpsllvq))]
21921pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21922    unsafe {
21923        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21924        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21925    }
21926}
21927
21928/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21929///
21930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21931#[inline]
21932#[target_feature(enable = "avx512f,avx512vl")]
21933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21934#[cfg_attr(test, assert_instr(vpsllvq))]
21935pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21936    unsafe {
21937        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21938        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21939    }
21940}
21941
21942/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21943///
21944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21945#[inline]
21946#[target_feature(enable = "avx512f,avx512vl")]
21947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948#[cfg_attr(test, assert_instr(vpsllvq))]
21949pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21950    unsafe {
21951        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21952        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21953    }
21954}
21955
21956/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21957///
21958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21959#[inline]
21960#[target_feature(enable = "avx512f,avx512vl")]
21961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21962#[cfg_attr(test, assert_instr(vpsllvq))]
21963pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21964    unsafe {
21965        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21966        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21967    }
21968}
21969
21970/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21971///
21972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21973#[inline]
21974#[target_feature(enable = "avx512f,avx512vl")]
21975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976#[cfg_attr(test, assert_instr(vpsllvq))]
21977pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21978    unsafe {
21979        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21980        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21981    }
21982}
21983
21984/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21985///
21986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
21987#[inline]
21988#[target_feature(enable = "avx512f")]
21989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21990#[cfg_attr(test, assert_instr(vpsrlvq))]
21991pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
21992    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
21993}
21994
21995/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21996///
21997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
21998#[inline]
21999#[target_feature(enable = "avx512f")]
22000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22001#[cfg_attr(test, assert_instr(vpsrlvq))]
22002pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22003    unsafe {
22004        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22005        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22006    }
22007}
22008
22009/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22010///
22011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22012#[inline]
22013#[target_feature(enable = "avx512f")]
22014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22015#[cfg_attr(test, assert_instr(vpsrlvq))]
22016pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22017    unsafe {
22018        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22019        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22020    }
22021}
22022
22023/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22024///
22025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22026#[inline]
22027#[target_feature(enable = "avx512f,avx512vl")]
22028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22029#[cfg_attr(test, assert_instr(vpsrlvq))]
22030pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22031    unsafe {
22032        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22033        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22034    }
22035}
22036
22037/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22038///
22039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22040#[inline]
22041#[target_feature(enable = "avx512f,avx512vl")]
22042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22043#[cfg_attr(test, assert_instr(vpsrlvq))]
22044pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22045    unsafe {
22046        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22047        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22048    }
22049}
22050
22051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22052///
22053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22054#[inline]
22055#[target_feature(enable = "avx512f,avx512vl")]
22056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22057#[cfg_attr(test, assert_instr(vpsrlvq))]
22058pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22059    unsafe {
22060        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22061        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22062    }
22063}
22064
22065/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22066///
22067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22068#[inline]
22069#[target_feature(enable = "avx512f,avx512vl")]
22070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22071#[cfg_attr(test, assert_instr(vpsrlvq))]
22072pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22073    unsafe {
22074        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22075        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22076    }
22077}
22078
22079/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22080///
22081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22082#[inline]
22083#[target_feature(enable = "avx512f")]
22084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22085#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22086#[rustc_legacy_const_generics(1)]
22087pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22088    unsafe {
22089        static_assert_uimm_bits!(MASK, 8);
22090        simd_shuffle!(
22091            a,
22092            a,
22093            [
22094                MASK as u32 & 0b11,
22095                (MASK as u32 >> 2) & 0b11,
22096                ((MASK as u32 >> 4) & 0b11),
22097                ((MASK as u32 >> 6) & 0b11),
22098                (MASK as u32 & 0b11) + 4,
22099                ((MASK as u32 >> 2) & 0b11) + 4,
22100                ((MASK as u32 >> 4) & 0b11) + 4,
22101                ((MASK as u32 >> 6) & 0b11) + 4,
22102                (MASK as u32 & 0b11) + 8,
22103                ((MASK as u32 >> 2) & 0b11) + 8,
22104                ((MASK as u32 >> 4) & 0b11) + 8,
22105                ((MASK as u32 >> 6) & 0b11) + 8,
22106                (MASK as u32 & 0b11) + 12,
22107                ((MASK as u32 >> 2) & 0b11) + 12,
22108                ((MASK as u32 >> 4) & 0b11) + 12,
22109                ((MASK as u32 >> 6) & 0b11) + 12,
22110            ],
22111        )
22112    }
22113}
22114
22115/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22116///
22117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22118#[inline]
22119#[target_feature(enable = "avx512f")]
22120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22121#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22122#[rustc_legacy_const_generics(3)]
22123pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22124    unsafe {
22125        static_assert_uimm_bits!(MASK, 8);
22126        let r = _mm512_permute_ps::<MASK>(a);
22127        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22128    }
22129}
22130
22131/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22132///
22133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22134#[inline]
22135#[target_feature(enable = "avx512f")]
22136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22137#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22138#[rustc_legacy_const_generics(2)]
22139pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22140    unsafe {
22141        static_assert_uimm_bits!(MASK, 8);
22142        let r = _mm512_permute_ps::<MASK>(a);
22143        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22144    }
22145}
22146
22147/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22148///
22149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22150#[inline]
22151#[target_feature(enable = "avx512f,avx512vl")]
22152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22153#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22154#[rustc_legacy_const_generics(3)]
22155pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22156    unsafe {
22157        let r = _mm256_permute_ps::<MASK>(a);
22158        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22159    }
22160}
22161
22162/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22163///
22164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22165#[inline]
22166#[target_feature(enable = "avx512f,avx512vl")]
22167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22168#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22169#[rustc_legacy_const_generics(2)]
22170pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22171    unsafe {
22172        let r = _mm256_permute_ps::<MASK>(a);
22173        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22174    }
22175}
22176
22177/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22178///
22179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22180#[inline]
22181#[target_feature(enable = "avx512f,avx512vl")]
22182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22183#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22184#[rustc_legacy_const_generics(3)]
22185pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22186    unsafe {
22187        let r = _mm_permute_ps::<MASK>(a);
22188        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22189    }
22190}
22191
22192/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22193///
22194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22195#[inline]
22196#[target_feature(enable = "avx512f,avx512vl")]
22197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22198#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22199#[rustc_legacy_const_generics(2)]
22200pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22201    unsafe {
22202        let r = _mm_permute_ps::<MASK>(a);
22203        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22204    }
22205}
22206
22207/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22208///
22209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22210#[inline]
22211#[target_feature(enable = "avx512f")]
22212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22213#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22214#[rustc_legacy_const_generics(1)]
22215pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22216    unsafe {
22217        static_assert_uimm_bits!(MASK, 8);
22218        simd_shuffle!(
22219            a,
22220            a,
22221            [
22222                MASK as u32 & 0b1,
22223                ((MASK as u32 >> 1) & 0b1),
22224                ((MASK as u32 >> 2) & 0b1) + 2,
22225                ((MASK as u32 >> 3) & 0b1) + 2,
22226                ((MASK as u32 >> 4) & 0b1) + 4,
22227                ((MASK as u32 >> 5) & 0b1) + 4,
22228                ((MASK as u32 >> 6) & 0b1) + 6,
22229                ((MASK as u32 >> 7) & 0b1) + 6,
22230            ],
22231        )
22232    }
22233}
22234
22235/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22236///
22237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22238#[inline]
22239#[target_feature(enable = "avx512f")]
22240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22241#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22242#[rustc_legacy_const_generics(3)]
22243pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22244    unsafe {
22245        static_assert_uimm_bits!(MASK, 8);
22246        let r = _mm512_permute_pd::<MASK>(a);
22247        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22248    }
22249}
22250
22251/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22252///
22253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22254#[inline]
22255#[target_feature(enable = "avx512f")]
22256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22257#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22258#[rustc_legacy_const_generics(2)]
22259pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22260    unsafe {
22261        static_assert_uimm_bits!(MASK, 8);
22262        let r = _mm512_permute_pd::<MASK>(a);
22263        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22264    }
22265}
22266
22267/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22268///
22269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22270#[inline]
22271#[target_feature(enable = "avx512f,avx512vl")]
22272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22273#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22274#[rustc_legacy_const_generics(3)]
22275pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22276    unsafe {
22277        static_assert_uimm_bits!(MASK, 4);
22278        let r = _mm256_permute_pd::<MASK>(a);
22279        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22280    }
22281}
22282
22283/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22284///
22285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22286#[inline]
22287#[target_feature(enable = "avx512f,avx512vl")]
22288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22289#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22290#[rustc_legacy_const_generics(2)]
22291pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22292    unsafe {
22293        static_assert_uimm_bits!(MASK, 4);
22294        let r = _mm256_permute_pd::<MASK>(a);
22295        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22296    }
22297}
22298
22299/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22300///
22301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22302#[inline]
22303#[target_feature(enable = "avx512f,avx512vl")]
22304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22305#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22306#[rustc_legacy_const_generics(3)]
22307pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22308    unsafe {
22309        static_assert_uimm_bits!(IMM2, 2);
22310        let r = _mm_permute_pd::<IMM2>(a);
22311        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22312    }
22313}
22314
22315/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22316///
22317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22318#[inline]
22319#[target_feature(enable = "avx512f,avx512vl")]
22320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22321#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22322#[rustc_legacy_const_generics(2)]
22323pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22324    unsafe {
22325        static_assert_uimm_bits!(IMM2, 2);
22326        let r = _mm_permute_pd::<IMM2>(a);
22327        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22328    }
22329}
22330
22331/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22332///
22333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22334#[inline]
22335#[target_feature(enable = "avx512f")]
22336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22337#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22338#[rustc_legacy_const_generics(1)]
22339pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22340    unsafe {
22341        static_assert_uimm_bits!(MASK, 8);
22342        simd_shuffle!(
22343            a,
22344            a,
22345            [
22346                MASK as u32 & 0b11,
22347                (MASK as u32 >> 2) & 0b11,
22348                ((MASK as u32 >> 4) & 0b11),
22349                ((MASK as u32 >> 6) & 0b11),
22350                (MASK as u32 & 0b11) + 4,
22351                ((MASK as u32 >> 2) & 0b11) + 4,
22352                ((MASK as u32 >> 4) & 0b11) + 4,
22353                ((MASK as u32 >> 6) & 0b11) + 4,
22354            ],
22355        )
22356    }
22357}
22358
22359/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22360///
22361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22362#[inline]
22363#[target_feature(enable = "avx512f")]
22364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22365#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22366#[rustc_legacy_const_generics(3)]
22367pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22368    src: __m512i,
22369    k: __mmask8,
22370    a: __m512i,
22371) -> __m512i {
22372    unsafe {
22373        static_assert_uimm_bits!(MASK, 8);
22374        let r = _mm512_permutex_epi64::<MASK>(a);
22375        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22376    }
22377}
22378
22379/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22380///
22381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22382#[inline]
22383#[target_feature(enable = "avx512f")]
22384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22385#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22386#[rustc_legacy_const_generics(2)]
22387pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22388    unsafe {
22389        static_assert_uimm_bits!(MASK, 8);
22390        let r = _mm512_permutex_epi64::<MASK>(a);
22391        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22392    }
22393}
22394
22395/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22396///
22397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22398#[inline]
22399#[target_feature(enable = "avx512f,avx512vl")]
22400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22401#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22402#[rustc_legacy_const_generics(1)]
22403pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22404    unsafe {
22405        static_assert_uimm_bits!(MASK, 8);
22406        simd_shuffle!(
22407            a,
22408            a,
22409            [
22410                MASK as u32 & 0b11,
22411                (MASK as u32 >> 2) & 0b11,
22412                ((MASK as u32 >> 4) & 0b11),
22413                ((MASK as u32 >> 6) & 0b11),
22414            ],
22415        )
22416    }
22417}
22418
22419/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22420///
22421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22422#[inline]
22423#[target_feature(enable = "avx512f,avx512vl")]
22424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22425#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22426#[rustc_legacy_const_generics(3)]
22427pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22428    src: __m256i,
22429    k: __mmask8,
22430    a: __m256i,
22431) -> __m256i {
22432    unsafe {
22433        static_assert_uimm_bits!(MASK, 8);
22434        let r = _mm256_permutex_epi64::<MASK>(a);
22435        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22436    }
22437}
22438
22439/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22440///
22441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22442#[inline]
22443#[target_feature(enable = "avx512f,avx512vl")]
22444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22445#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22446#[rustc_legacy_const_generics(2)]
22447pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22448    unsafe {
22449        static_assert_uimm_bits!(MASK, 8);
22450        let r = _mm256_permutex_epi64::<MASK>(a);
22451        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22452    }
22453}
22454
22455/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22456///
22457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22458#[inline]
22459#[target_feature(enable = "avx512f")]
22460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22461#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22462#[rustc_legacy_const_generics(1)]
22463pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22464    unsafe {
22465        static_assert_uimm_bits!(MASK, 8);
22466        simd_shuffle!(
22467            a,
22468            a,
22469            [
22470                MASK as u32 & 0b11,
22471                (MASK as u32 >> 2) & 0b11,
22472                ((MASK as u32 >> 4) & 0b11),
22473                ((MASK as u32 >> 6) & 0b11),
22474                (MASK as u32 & 0b11) + 4,
22475                ((MASK as u32 >> 2) & 0b11) + 4,
22476                ((MASK as u32 >> 4) & 0b11) + 4,
22477                ((MASK as u32 >> 6) & 0b11) + 4,
22478            ],
22479        )
22480    }
22481}
22482
22483/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22484///
22485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22486#[inline]
22487#[target_feature(enable = "avx512f")]
22488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22489#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22490#[rustc_legacy_const_generics(3)]
22491pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22492    unsafe {
22493        let r = _mm512_permutex_pd::<MASK>(a);
22494        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22495    }
22496}
22497
22498/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22499///
22500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22501#[inline]
22502#[target_feature(enable = "avx512f")]
22503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22504#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22505#[rustc_legacy_const_generics(2)]
22506pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22507    unsafe {
22508        let r = _mm512_permutex_pd::<MASK>(a);
22509        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22510    }
22511}
22512
22513/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22514///
22515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22516#[inline]
22517#[target_feature(enable = "avx512f,avx512vl")]
22518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22519#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22520#[rustc_legacy_const_generics(1)]
22521pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22522    unsafe {
22523        static_assert_uimm_bits!(MASK, 8);
22524        simd_shuffle!(
22525            a,
22526            a,
22527            [
22528                MASK as u32 & 0b11,
22529                (MASK as u32 >> 2) & 0b11,
22530                ((MASK as u32 >> 4) & 0b11),
22531                ((MASK as u32 >> 6) & 0b11),
22532            ],
22533        )
22534    }
22535}
22536
22537/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22538///
22539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22540#[inline]
22541#[target_feature(enable = "avx512f,avx512vl")]
22542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22543#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22544#[rustc_legacy_const_generics(3)]
22545pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22546    unsafe {
22547        static_assert_uimm_bits!(MASK, 8);
22548        let r = _mm256_permutex_pd::<MASK>(a);
22549        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22550    }
22551}
22552
22553/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22554///
22555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22556#[inline]
22557#[target_feature(enable = "avx512f,avx512vl")]
22558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22559#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22560#[rustc_legacy_const_generics(2)]
22561pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22562    unsafe {
22563        static_assert_uimm_bits!(MASK, 8);
22564        let r = _mm256_permutex_pd::<MASK>(a);
22565        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22566    }
22567}
22568
22569/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22570///
22571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22572#[inline]
22573#[target_feature(enable = "avx512f")]
22574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22575#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22576pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22577    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22578}
22579
22580/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22581///
22582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22583#[inline]
22584#[target_feature(enable = "avx512f")]
22585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22586#[cfg_attr(test, assert_instr(vpermd))]
22587pub fn _mm512_mask_permutevar_epi32(
22588    src: __m512i,
22589    k: __mmask16,
22590    idx: __m512i,
22591    a: __m512i,
22592) -> __m512i {
22593    unsafe {
22594        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22595        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22596    }
22597}
22598
22599/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22600///
22601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22602#[inline]
22603#[target_feature(enable = "avx512f")]
22604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22605#[cfg_attr(test, assert_instr(vpermilps))]
22606pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22607    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22608}
22609
22610/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22611///
22612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22613#[inline]
22614#[target_feature(enable = "avx512f")]
22615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22616#[cfg_attr(test, assert_instr(vpermilps))]
22617pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22618    unsafe {
22619        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22620        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22621    }
22622}
22623
22624/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22625///
22626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22627#[inline]
22628#[target_feature(enable = "avx512f")]
22629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22630#[cfg_attr(test, assert_instr(vpermilps))]
22631pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22632    unsafe {
22633        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22634        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22635    }
22636}
22637
22638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22639///
22640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22641#[inline]
22642#[target_feature(enable = "avx512f,avx512vl")]
22643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22644#[cfg_attr(test, assert_instr(vpermilps))]
22645pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22646    unsafe {
22647        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22648        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22649    }
22650}
22651
22652/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22653///
22654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22655#[inline]
22656#[target_feature(enable = "avx512f,avx512vl")]
22657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22658#[cfg_attr(test, assert_instr(vpermilps))]
22659pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22660    unsafe {
22661        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22662        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22663    }
22664}
22665
22666/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22667///
22668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22669#[inline]
22670#[target_feature(enable = "avx512f,avx512vl")]
22671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22672#[cfg_attr(test, assert_instr(vpermilps))]
22673pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22674    unsafe {
22675        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22676        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22677    }
22678}
22679
22680/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22681///
22682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22683#[inline]
22684#[target_feature(enable = "avx512f,avx512vl")]
22685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22686#[cfg_attr(test, assert_instr(vpermilps))]
22687pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22688    unsafe {
22689        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22690        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22691    }
22692}
22693
22694/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22695///
22696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22697#[inline]
22698#[target_feature(enable = "avx512f")]
22699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22700#[cfg_attr(test, assert_instr(vpermilpd))]
22701pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22702    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22703}
22704
22705/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22706///
22707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22708#[inline]
22709#[target_feature(enable = "avx512f")]
22710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22711#[cfg_attr(test, assert_instr(vpermilpd))]
22712pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22713    unsafe {
22714        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22715        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22716    }
22717}
22718
22719/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22720///
22721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22722#[inline]
22723#[target_feature(enable = "avx512f")]
22724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22725#[cfg_attr(test, assert_instr(vpermilpd))]
22726pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22727    unsafe {
22728        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22729        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22730    }
22731}
22732
22733/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22734///
22735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22736#[inline]
22737#[target_feature(enable = "avx512f,avx512vl")]
22738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22739#[cfg_attr(test, assert_instr(vpermilpd))]
22740pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22741    unsafe {
22742        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22743        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22744    }
22745}
22746
22747/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22748///
22749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22750#[inline]
22751#[target_feature(enable = "avx512f,avx512vl")]
22752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22753#[cfg_attr(test, assert_instr(vpermilpd))]
22754pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22755    unsafe {
22756        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22757        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22758    }
22759}
22760
22761/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22762///
22763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22764#[inline]
22765#[target_feature(enable = "avx512f,avx512vl")]
22766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22767#[cfg_attr(test, assert_instr(vpermilpd))]
22768pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22769    unsafe {
22770        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22771        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22772    }
22773}
22774
22775/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22781#[cfg_attr(test, assert_instr(vpermilpd))]
22782pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22783    unsafe {
22784        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22785        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22786    }
22787}
22788
22789/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22790///
22791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22792#[inline]
22793#[target_feature(enable = "avx512f")]
22794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22795#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22796pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22797    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22798}
22799
22800/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22801///
22802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22803#[inline]
22804#[target_feature(enable = "avx512f")]
22805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22806#[cfg_attr(test, assert_instr(vpermd))]
22807pub fn _mm512_mask_permutexvar_epi32(
22808    src: __m512i,
22809    k: __mmask16,
22810    idx: __m512i,
22811    a: __m512i,
22812) -> __m512i {
22813    unsafe {
22814        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22815        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22816    }
22817}
22818
22819/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22820///
22821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22822#[inline]
22823#[target_feature(enable = "avx512f")]
22824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22825#[cfg_attr(test, assert_instr(vpermd))]
22826pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22827    unsafe {
22828        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22829        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22830    }
22831}
22832
22833/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22834///
22835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22836#[inline]
22837#[target_feature(enable = "avx512f,avx512vl")]
22838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22839#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22840pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22841    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22842}
22843
22844/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22845///
22846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22847#[inline]
22848#[target_feature(enable = "avx512f,avx512vl")]
22849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22850#[cfg_attr(test, assert_instr(vpermd))]
22851pub fn _mm256_mask_permutexvar_epi32(
22852    src: __m256i,
22853    k: __mmask8,
22854    idx: __m256i,
22855    a: __m256i,
22856) -> __m256i {
22857    unsafe {
22858        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22859        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22860    }
22861}
22862
22863/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22864///
22865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22866#[inline]
22867#[target_feature(enable = "avx512f,avx512vl")]
22868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22869#[cfg_attr(test, assert_instr(vpermd))]
22870pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22871    unsafe {
22872        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22873        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22874    }
22875}
22876
22877/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22883#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22884pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22885    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22886}
22887
22888/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22889///
22890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22891#[inline]
22892#[target_feature(enable = "avx512f")]
22893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22894#[cfg_attr(test, assert_instr(vpermq))]
22895pub fn _mm512_mask_permutexvar_epi64(
22896    src: __m512i,
22897    k: __mmask8,
22898    idx: __m512i,
22899    a: __m512i,
22900) -> __m512i {
22901    unsafe {
22902        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22903        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22904    }
22905}
22906
22907/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22908///
22909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22910#[inline]
22911#[target_feature(enable = "avx512f")]
22912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22913#[cfg_attr(test, assert_instr(vpermq))]
22914pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22915    unsafe {
22916        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22917        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22918    }
22919}
22920
22921/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22922///
22923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22924#[inline]
22925#[target_feature(enable = "avx512f,avx512vl")]
22926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22927#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22928pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22929    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22930}
22931
22932/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22933///
22934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22935#[inline]
22936#[target_feature(enable = "avx512f,avx512vl")]
22937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22938#[cfg_attr(test, assert_instr(vpermq))]
22939pub fn _mm256_mask_permutexvar_epi64(
22940    src: __m256i,
22941    k: __mmask8,
22942    idx: __m256i,
22943    a: __m256i,
22944) -> __m256i {
22945    unsafe {
22946        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22947        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
22948    }
22949}
22950
22951/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vpermq))]
22958pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22959    unsafe {
22960        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22961        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
22962    }
22963}
22964
22965/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22966///
22967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22968#[inline]
22969#[target_feature(enable = "avx512f")]
22970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22971#[cfg_attr(test, assert_instr(vpermps))]
22972pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
22973    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
22974}
22975
22976/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22977///
22978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
22979#[inline]
22980#[target_feature(enable = "avx512f")]
22981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22982#[cfg_attr(test, assert_instr(vpermps))]
22983pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22984    unsafe {
22985        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
22986        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22987    }
22988}
22989
22990/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22991///
22992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
22993#[inline]
22994#[target_feature(enable = "avx512f")]
22995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22996#[cfg_attr(test, assert_instr(vpermps))]
22997pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22998    unsafe {
22999        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23000        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23001    }
23002}
23003
23004/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23005///
23006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23007#[inline]
23008#[target_feature(enable = "avx512f,avx512vl")]
23009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23010#[cfg_attr(test, assert_instr(vpermps))]
23011pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23012    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23013}
23014
23015/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23016///
23017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23018#[inline]
23019#[target_feature(enable = "avx512f,avx512vl")]
23020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23021#[cfg_attr(test, assert_instr(vpermps))]
23022pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23023    unsafe {
23024        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23025        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23026    }
23027}
23028
23029/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23030///
23031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23032#[inline]
23033#[target_feature(enable = "avx512f,avx512vl")]
23034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23035#[cfg_attr(test, assert_instr(vpermps))]
23036pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23037    unsafe {
23038        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23039        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23040    }
23041}
23042
23043/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23044///
23045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23046#[inline]
23047#[target_feature(enable = "avx512f")]
23048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23049#[cfg_attr(test, assert_instr(vpermpd))]
23050pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23051    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23052}
23053
23054/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23055///
23056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23057#[inline]
23058#[target_feature(enable = "avx512f")]
23059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23060#[cfg_attr(test, assert_instr(vpermpd))]
23061pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23062    unsafe {
23063        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23064        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23065    }
23066}
23067
23068/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23069///
23070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23071#[inline]
23072#[target_feature(enable = "avx512f")]
23073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23074#[cfg_attr(test, assert_instr(vpermpd))]
23075pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23076    unsafe {
23077        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23078        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23079    }
23080}
23081
23082/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23083///
23084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23085#[inline]
23086#[target_feature(enable = "avx512f,avx512vl")]
23087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23088#[cfg_attr(test, assert_instr(vpermpd))]
23089pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23090    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23091}
23092
23093/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23094///
23095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23096#[inline]
23097#[target_feature(enable = "avx512f,avx512vl")]
23098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099#[cfg_attr(test, assert_instr(vpermpd))]
23100pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23101    unsafe {
23102        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23103        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23104    }
23105}
23106
23107/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23108///
23109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23110#[inline]
23111#[target_feature(enable = "avx512f,avx512vl")]
23112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23113#[cfg_attr(test, assert_instr(vpermpd))]
23114pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23115    unsafe {
23116        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23117        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23118    }
23119}
23120
23121/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23122///
23123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23124#[inline]
23125#[target_feature(enable = "avx512f")]
23126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23127#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23128pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23129    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23130}
23131
23132/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23133///
23134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23135#[inline]
23136#[target_feature(enable = "avx512f")]
23137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23138#[cfg_attr(test, assert_instr(vpermt2d))]
23139pub fn _mm512_mask_permutex2var_epi32(
23140    a: __m512i,
23141    k: __mmask16,
23142    idx: __m512i,
23143    b: __m512i,
23144) -> __m512i {
23145    unsafe {
23146        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23147        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23148    }
23149}
23150
23151/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23152///
23153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23154#[inline]
23155#[target_feature(enable = "avx512f")]
23156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23157#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23158pub fn _mm512_maskz_permutex2var_epi32(
23159    k: __mmask16,
23160    a: __m512i,
23161    idx: __m512i,
23162    b: __m512i,
23163) -> __m512i {
23164    unsafe {
23165        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23166        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23167    }
23168}
23169
23170/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23171///
23172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23173#[inline]
23174#[target_feature(enable = "avx512f")]
23175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23176#[cfg_attr(test, assert_instr(vpermi2d))]
23177pub fn _mm512_mask2_permutex2var_epi32(
23178    a: __m512i,
23179    idx: __m512i,
23180    k: __mmask16,
23181    b: __m512i,
23182) -> __m512i {
23183    unsafe {
23184        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23185        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23186    }
23187}
23188
23189/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23190///
23191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23192#[inline]
23193#[target_feature(enable = "avx512f,avx512vl")]
23194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23195#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23196pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23197    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23198}
23199
23200/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23201///
23202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23203#[inline]
23204#[target_feature(enable = "avx512f,avx512vl")]
23205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23206#[cfg_attr(test, assert_instr(vpermt2d))]
23207pub fn _mm256_mask_permutex2var_epi32(
23208    a: __m256i,
23209    k: __mmask8,
23210    idx: __m256i,
23211    b: __m256i,
23212) -> __m256i {
23213    unsafe {
23214        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23215        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23216    }
23217}
23218
23219/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23220///
23221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23222#[inline]
23223#[target_feature(enable = "avx512f,avx512vl")]
23224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23225#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23226pub fn _mm256_maskz_permutex2var_epi32(
23227    k: __mmask8,
23228    a: __m256i,
23229    idx: __m256i,
23230    b: __m256i,
23231) -> __m256i {
23232    unsafe {
23233        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23234        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23235    }
23236}
23237
23238/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23239///
23240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23241#[inline]
23242#[target_feature(enable = "avx512f,avx512vl")]
23243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23244#[cfg_attr(test, assert_instr(vpermi2d))]
23245pub fn _mm256_mask2_permutex2var_epi32(
23246    a: __m256i,
23247    idx: __m256i,
23248    k: __mmask8,
23249    b: __m256i,
23250) -> __m256i {
23251    unsafe {
23252        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23253        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23254    }
23255}
23256
23257/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23258///
23259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23260#[inline]
23261#[target_feature(enable = "avx512f,avx512vl")]
23262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23263#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23264pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23265    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23266}
23267
23268/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23269///
23270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23271#[inline]
23272#[target_feature(enable = "avx512f,avx512vl")]
23273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23274#[cfg_attr(test, assert_instr(vpermt2d))]
23275pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23276    unsafe {
23277        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23278        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23279    }
23280}
23281
23282/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23283///
23284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23285#[inline]
23286#[target_feature(enable = "avx512f,avx512vl")]
23287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23288#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23289pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23290    unsafe {
23291        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23292        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23293    }
23294}
23295
23296/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23297///
23298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23299#[inline]
23300#[target_feature(enable = "avx512f,avx512vl")]
23301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23302#[cfg_attr(test, assert_instr(vpermi2d))]
23303pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23304    unsafe {
23305        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23306        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23307    }
23308}
23309
23310/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23311///
23312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23313#[inline]
23314#[target_feature(enable = "avx512f")]
23315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23316#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23317pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23318    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23319}
23320
23321/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23322///
23323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23324#[inline]
23325#[target_feature(enable = "avx512f")]
23326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23327#[cfg_attr(test, assert_instr(vpermt2q))]
23328pub fn _mm512_mask_permutex2var_epi64(
23329    a: __m512i,
23330    k: __mmask8,
23331    idx: __m512i,
23332    b: __m512i,
23333) -> __m512i {
23334    unsafe {
23335        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23336        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23337    }
23338}
23339
23340/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23341///
23342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23343#[inline]
23344#[target_feature(enable = "avx512f")]
23345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23346#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23347pub fn _mm512_maskz_permutex2var_epi64(
23348    k: __mmask8,
23349    a: __m512i,
23350    idx: __m512i,
23351    b: __m512i,
23352) -> __m512i {
23353    unsafe {
23354        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23355        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23356    }
23357}
23358
23359/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23360///
23361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23362#[inline]
23363#[target_feature(enable = "avx512f")]
23364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23365#[cfg_attr(test, assert_instr(vpermi2q))]
23366pub fn _mm512_mask2_permutex2var_epi64(
23367    a: __m512i,
23368    idx: __m512i,
23369    k: __mmask8,
23370    b: __m512i,
23371) -> __m512i {
23372    unsafe {
23373        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23374        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23375    }
23376}
23377
23378/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23379///
23380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23381#[inline]
23382#[target_feature(enable = "avx512f,avx512vl")]
23383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23384#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23385pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23386    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23387}
23388
23389/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23390///
23391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23392#[inline]
23393#[target_feature(enable = "avx512f,avx512vl")]
23394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23395#[cfg_attr(test, assert_instr(vpermt2q))]
23396pub fn _mm256_mask_permutex2var_epi64(
23397    a: __m256i,
23398    k: __mmask8,
23399    idx: __m256i,
23400    b: __m256i,
23401) -> __m256i {
23402    unsafe {
23403        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23404        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23405    }
23406}
23407
23408/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23409///
23410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23411#[inline]
23412#[target_feature(enable = "avx512f,avx512vl")]
23413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23414#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23415pub fn _mm256_maskz_permutex2var_epi64(
23416    k: __mmask8,
23417    a: __m256i,
23418    idx: __m256i,
23419    b: __m256i,
23420) -> __m256i {
23421    unsafe {
23422        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23423        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23424    }
23425}
23426
23427/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23428///
23429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23430#[inline]
23431#[target_feature(enable = "avx512f,avx512vl")]
23432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23433#[cfg_attr(test, assert_instr(vpermi2q))]
23434pub fn _mm256_mask2_permutex2var_epi64(
23435    a: __m256i,
23436    idx: __m256i,
23437    k: __mmask8,
23438    b: __m256i,
23439) -> __m256i {
23440    unsafe {
23441        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23442        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23443    }
23444}
23445
23446/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23447///
23448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23449#[inline]
23450#[target_feature(enable = "avx512f,avx512vl")]
23451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23452#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23453pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23454    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23455}
23456
23457/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23458///
23459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23460#[inline]
23461#[target_feature(enable = "avx512f,avx512vl")]
23462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463#[cfg_attr(test, assert_instr(vpermt2q))]
23464pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23465    unsafe {
23466        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23467        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23468    }
23469}
23470
23471/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23472///
23473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23474#[inline]
23475#[target_feature(enable = "avx512f,avx512vl")]
23476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23477#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23478pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23479    unsafe {
23480        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23481        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23482    }
23483}
23484
23485/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23486///
23487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23488#[inline]
23489#[target_feature(enable = "avx512f,avx512vl")]
23490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23491#[cfg_attr(test, assert_instr(vpermi2q))]
23492pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23493    unsafe {
23494        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23495        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23496    }
23497}
23498
23499/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23500///
23501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23502#[inline]
23503#[target_feature(enable = "avx512f")]
23504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23505#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23506pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23507    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23508}
23509
23510/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23511///
23512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23513#[inline]
23514#[target_feature(enable = "avx512f")]
23515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23516#[cfg_attr(test, assert_instr(vpermt2ps))]
23517pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23518    unsafe {
23519        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23520        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23521    }
23522}
23523
23524/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23525///
23526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23527#[inline]
23528#[target_feature(enable = "avx512f")]
23529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23531pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23532    unsafe {
23533        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23534        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23535    }
23536}
23537
23538/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23539///
23540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23541#[inline]
23542#[target_feature(enable = "avx512f")]
23543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23544#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23545pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23546    unsafe {
23547        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23548        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23549        transmute(simd_select_bitmask(k, permute, idx))
23550    }
23551}
23552
23553/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23554///
23555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23556#[inline]
23557#[target_feature(enable = "avx512f,avx512vl")]
23558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23559#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23560pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23561    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23562}
23563
23564/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23565///
23566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23567#[inline]
23568#[target_feature(enable = "avx512f,avx512vl")]
23569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23570#[cfg_attr(test, assert_instr(vpermt2ps))]
23571pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23572    unsafe {
23573        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23574        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23575    }
23576}
23577
23578/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23579///
23580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23581#[inline]
23582#[target_feature(enable = "avx512f,avx512vl")]
23583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23584#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23585pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23586    unsafe {
23587        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23588        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23589    }
23590}
23591
23592/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23593///
23594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23595#[inline]
23596#[target_feature(enable = "avx512f,avx512vl")]
23597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23598#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23599pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23600    unsafe {
23601        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23602        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23603        transmute(simd_select_bitmask(k, permute, idx))
23604    }
23605}
23606
23607/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23608///
23609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23610#[inline]
23611#[target_feature(enable = "avx512f,avx512vl")]
23612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23613#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23614pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23615    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23616}
23617
23618/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23619///
23620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23621#[inline]
23622#[target_feature(enable = "avx512f,avx512vl")]
23623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23624#[cfg_attr(test, assert_instr(vpermt2ps))]
23625pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23626    unsafe {
23627        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23628        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23629    }
23630}
23631
23632/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23633///
23634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23635#[inline]
23636#[target_feature(enable = "avx512f,avx512vl")]
23637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23639pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23640    unsafe {
23641        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23642        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23643    }
23644}
23645
23646/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23647///
23648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23649#[inline]
23650#[target_feature(enable = "avx512f,avx512vl")]
23651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23652#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23653pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23654    unsafe {
23655        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23656        let idx = _mm_castsi128_ps(idx).as_f32x4();
23657        transmute(simd_select_bitmask(k, permute, idx))
23658    }
23659}
23660
23661/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23662///
23663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23664#[inline]
23665#[target_feature(enable = "avx512f")]
23666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23667#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23668pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23669    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23670}
23671
23672/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23673///
23674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23675#[inline]
23676#[target_feature(enable = "avx512f")]
23677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23678#[cfg_attr(test, assert_instr(vpermt2pd))]
23679pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23680    unsafe {
23681        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23682        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23683    }
23684}
23685
23686/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23687///
23688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23689#[inline]
23690#[target_feature(enable = "avx512f")]
23691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23692#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23693pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23694    unsafe {
23695        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23696        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23697    }
23698}
23699
23700/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23701///
23702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23703#[inline]
23704#[target_feature(enable = "avx512f")]
23705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23706#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23707pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23708    unsafe {
23709        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23710        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23711        transmute(simd_select_bitmask(k, permute, idx))
23712    }
23713}
23714
23715/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23716///
23717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23718#[inline]
23719#[target_feature(enable = "avx512f,avx512vl")]
23720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23721#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23722pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23723    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23724}
23725
23726/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23727///
23728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23729#[inline]
23730#[target_feature(enable = "avx512f,avx512vl")]
23731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23732#[cfg_attr(test, assert_instr(vpermt2pd))]
23733pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23734    unsafe {
23735        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23736        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23737    }
23738}
23739
23740/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23741///
23742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23743#[inline]
23744#[target_feature(enable = "avx512f,avx512vl")]
23745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23746#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23747pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23748    unsafe {
23749        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23750        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23751    }
23752}
23753
23754/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23755///
23756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23757#[inline]
23758#[target_feature(enable = "avx512f,avx512vl")]
23759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23760#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23761pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23762    unsafe {
23763        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23764        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23765        transmute(simd_select_bitmask(k, permute, idx))
23766    }
23767}
23768
23769/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23770///
23771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23772#[inline]
23773#[target_feature(enable = "avx512f,avx512vl")]
23774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23775#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23776pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23777    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23778}
23779
23780/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23783#[inline]
23784#[target_feature(enable = "avx512f,avx512vl")]
23785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23786#[cfg_attr(test, assert_instr(vpermt2pd))]
23787pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23788    unsafe {
23789        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23790        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23791    }
23792}
23793
23794/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23795///
23796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23797#[inline]
23798#[target_feature(enable = "avx512f,avx512vl")]
23799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23800#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23801pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23802    unsafe {
23803        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23804        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23805    }
23806}
23807
23808/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23809///
23810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23811#[inline]
23812#[target_feature(enable = "avx512f,avx512vl")]
23813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23814#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23815pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23816    unsafe {
23817        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23818        let idx = _mm_castsi128_pd(idx).as_f64x2();
23819        transmute(simd_select_bitmask(k, permute, idx))
23820    }
23821}
23822
23823/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23824///
23825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23826#[inline]
23827#[target_feature(enable = "avx512f")]
23828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23829#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23830#[rustc_legacy_const_generics(1)]
23831pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23832    unsafe {
23833        static_assert_uimm_bits!(MASK, 8);
23834        let r: i32x16 = simd_shuffle!(
23835            a.as_i32x16(),
23836            a.as_i32x16(),
23837            [
23838                MASK as u32 & 0b11,
23839                (MASK as u32 >> 2) & 0b11,
23840                (MASK as u32 >> 4) & 0b11,
23841                (MASK as u32 >> 6) & 0b11,
23842                (MASK as u32 & 0b11) + 4,
23843                ((MASK as u32 >> 2) & 0b11) + 4,
23844                ((MASK as u32 >> 4) & 0b11) + 4,
23845                ((MASK as u32 >> 6) & 0b11) + 4,
23846                (MASK as u32 & 0b11) + 8,
23847                ((MASK as u32 >> 2) & 0b11) + 8,
23848                ((MASK as u32 >> 4) & 0b11) + 8,
23849                ((MASK as u32 >> 6) & 0b11) + 8,
23850                (MASK as u32 & 0b11) + 12,
23851                ((MASK as u32 >> 2) & 0b11) + 12,
23852                ((MASK as u32 >> 4) & 0b11) + 12,
23853                ((MASK as u32 >> 6) & 0b11) + 12,
23854            ],
23855        );
23856        transmute(r)
23857    }
23858}
23859
23860/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23861///
23862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23863#[inline]
23864#[target_feature(enable = "avx512f")]
23865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23866#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23867#[rustc_legacy_const_generics(3)]
23868pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23869    src: __m512i,
23870    k: __mmask16,
23871    a: __m512i,
23872) -> __m512i {
23873    unsafe {
23874        static_assert_uimm_bits!(MASK, 8);
23875        let r = _mm512_shuffle_epi32::<MASK>(a);
23876        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23877    }
23878}
23879
23880/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23881///
23882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23883#[inline]
23884#[target_feature(enable = "avx512f")]
23885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23886#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23887#[rustc_legacy_const_generics(2)]
23888pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23889    unsafe {
23890        static_assert_uimm_bits!(MASK, 8);
23891        let r = _mm512_shuffle_epi32::<MASK>(a);
23892        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23893    }
23894}
23895
23896/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23897///
23898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23899#[inline]
23900#[target_feature(enable = "avx512f,avx512vl")]
23901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23902#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23903#[rustc_legacy_const_generics(3)]
23904pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23905    src: __m256i,
23906    k: __mmask8,
23907    a: __m256i,
23908) -> __m256i {
23909    unsafe {
23910        static_assert_uimm_bits!(MASK, 8);
23911        let r = _mm256_shuffle_epi32::<MASK>(a);
23912        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23913    }
23914}
23915
23916/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23917///
23918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23919#[inline]
23920#[target_feature(enable = "avx512f,avx512vl")]
23921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23922#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23923#[rustc_legacy_const_generics(2)]
23924pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23925    unsafe {
23926        static_assert_uimm_bits!(MASK, 8);
23927        let r = _mm256_shuffle_epi32::<MASK>(a);
23928        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23929    }
23930}
23931
23932/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23933///
23934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23935#[inline]
23936#[target_feature(enable = "avx512f,avx512vl")]
23937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23938#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23939#[rustc_legacy_const_generics(3)]
23940pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23941    src: __m128i,
23942    k: __mmask8,
23943    a: __m128i,
23944) -> __m128i {
23945    unsafe {
23946        static_assert_uimm_bits!(MASK, 8);
23947        let r = _mm_shuffle_epi32::<MASK>(a);
23948        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23949    }
23950}
23951
23952/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23953///
23954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23955#[inline]
23956#[target_feature(enable = "avx512f,avx512vl")]
23957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23958#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23959#[rustc_legacy_const_generics(2)]
23960pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23961    unsafe {
23962        static_assert_uimm_bits!(MASK, 8);
23963        let r = _mm_shuffle_epi32::<MASK>(a);
23964        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23965    }
23966}
23967
23968/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23969///
23970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23971#[inline]
23972#[target_feature(enable = "avx512f")]
23973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23974#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23975#[rustc_legacy_const_generics(2)]
23976pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23977    unsafe {
23978        static_assert_uimm_bits!(MASK, 8);
23979        simd_shuffle!(
23980            a,
23981            b,
23982            [
23983                MASK as u32 & 0b11,
23984                (MASK as u32 >> 2) & 0b11,
23985                ((MASK as u32 >> 4) & 0b11) + 16,
23986                ((MASK as u32 >> 6) & 0b11) + 16,
23987                (MASK as u32 & 0b11) + 4,
23988                ((MASK as u32 >> 2) & 0b11) + 4,
23989                ((MASK as u32 >> 4) & 0b11) + 20,
23990                ((MASK as u32 >> 6) & 0b11) + 20,
23991                (MASK as u32 & 0b11) + 8,
23992                ((MASK as u32 >> 2) & 0b11) + 8,
23993                ((MASK as u32 >> 4) & 0b11) + 24,
23994                ((MASK as u32 >> 6) & 0b11) + 24,
23995                (MASK as u32 & 0b11) + 12,
23996                ((MASK as u32 >> 2) & 0b11) + 12,
23997                ((MASK as u32 >> 4) & 0b11) + 28,
23998                ((MASK as u32 >> 6) & 0b11) + 28,
23999            ],
24000        )
24001    }
24002}
24003
24004/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24005///
24006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24007#[inline]
24008#[target_feature(enable = "avx512f")]
24009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24010#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24011#[rustc_legacy_const_generics(4)]
24012pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
24013    src: __m512,
24014    k: __mmask16,
24015    a: __m512,
24016    b: __m512,
24017) -> __m512 {
24018    unsafe {
24019        static_assert_uimm_bits!(MASK, 8);
24020        let r = _mm512_shuffle_ps::<MASK>(a, b);
24021        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24022    }
24023}
24024
24025/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24026///
24027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24028#[inline]
24029#[target_feature(enable = "avx512f")]
24030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24031#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24032#[rustc_legacy_const_generics(3)]
24033pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24034    unsafe {
24035        static_assert_uimm_bits!(MASK, 8);
24036        let r = _mm512_shuffle_ps::<MASK>(a, b);
24037        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24038    }
24039}
24040
24041/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24042///
24043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24044#[inline]
24045#[target_feature(enable = "avx512f,avx512vl")]
24046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24047#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24048#[rustc_legacy_const_generics(4)]
24049pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24050    src: __m256,
24051    k: __mmask8,
24052    a: __m256,
24053    b: __m256,
24054) -> __m256 {
24055    unsafe {
24056        static_assert_uimm_bits!(MASK, 8);
24057        let r = _mm256_shuffle_ps::<MASK>(a, b);
24058        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24059    }
24060}
24061
24062/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24063///
24064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24065#[inline]
24066#[target_feature(enable = "avx512f,avx512vl")]
24067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24068#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24069#[rustc_legacy_const_generics(3)]
24070pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24071    unsafe {
24072        static_assert_uimm_bits!(MASK, 8);
24073        let r = _mm256_shuffle_ps::<MASK>(a, b);
24074        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24075    }
24076}
24077
24078/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24079///
24080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24081#[inline]
24082#[target_feature(enable = "avx512f,avx512vl")]
24083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24084#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24085#[rustc_legacy_const_generics(4)]
24086pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24087    src: __m128,
24088    k: __mmask8,
24089    a: __m128,
24090    b: __m128,
24091) -> __m128 {
24092    unsafe {
24093        static_assert_uimm_bits!(MASK, 8);
24094        let r = _mm_shuffle_ps::<MASK>(a, b);
24095        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24096    }
24097}
24098
24099/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24100///
24101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24102#[inline]
24103#[target_feature(enable = "avx512f,avx512vl")]
24104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24105#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24106#[rustc_legacy_const_generics(3)]
24107pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24108    unsafe {
24109        static_assert_uimm_bits!(MASK, 8);
24110        let r = _mm_shuffle_ps::<MASK>(a, b);
24111        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24112    }
24113}
24114
24115/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24116///
24117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24118#[inline]
24119#[target_feature(enable = "avx512f")]
24120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24121#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24122#[rustc_legacy_const_generics(2)]
24123pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24124    unsafe {
24125        static_assert_uimm_bits!(MASK, 8);
24126        simd_shuffle!(
24127            a,
24128            b,
24129            [
24130                MASK as u32 & 0b1,
24131                ((MASK as u32 >> 1) & 0b1) + 8,
24132                ((MASK as u32 >> 2) & 0b1) + 2,
24133                ((MASK as u32 >> 3) & 0b1) + 10,
24134                ((MASK as u32 >> 4) & 0b1) + 4,
24135                ((MASK as u32 >> 5) & 0b1) + 12,
24136                ((MASK as u32 >> 6) & 0b1) + 6,
24137                ((MASK as u32 >> 7) & 0b1) + 14,
24138            ],
24139        )
24140    }
24141}
24142
24143/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24144///
24145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24146#[inline]
24147#[target_feature(enable = "avx512f")]
24148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24149#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24150#[rustc_legacy_const_generics(4)]
24151pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24152    src: __m512d,
24153    k: __mmask8,
24154    a: __m512d,
24155    b: __m512d,
24156) -> __m512d {
24157    unsafe {
24158        static_assert_uimm_bits!(MASK, 8);
24159        let r = _mm512_shuffle_pd::<MASK>(a, b);
24160        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24161    }
24162}
24163
24164/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24165///
24166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24167#[inline]
24168#[target_feature(enable = "avx512f")]
24169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24170#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24171#[rustc_legacy_const_generics(3)]
24172pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24173    unsafe {
24174        static_assert_uimm_bits!(MASK, 8);
24175        let r = _mm512_shuffle_pd::<MASK>(a, b);
24176        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24177    }
24178}
24179
24180/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24181///
24182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24183#[inline]
24184#[target_feature(enable = "avx512f,avx512vl")]
24185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24186#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24187#[rustc_legacy_const_generics(4)]
24188pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24189    src: __m256d,
24190    k: __mmask8,
24191    a: __m256d,
24192    b: __m256d,
24193) -> __m256d {
24194    unsafe {
24195        static_assert_uimm_bits!(MASK, 8);
24196        let r = _mm256_shuffle_pd::<MASK>(a, b);
24197        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24198    }
24199}
24200
24201/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24202///
24203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24204#[inline]
24205#[target_feature(enable = "avx512f,avx512vl")]
24206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24207#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24208#[rustc_legacy_const_generics(3)]
24209pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24210    unsafe {
24211        static_assert_uimm_bits!(MASK, 8);
24212        let r = _mm256_shuffle_pd::<MASK>(a, b);
24213        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24214    }
24215}
24216
24217/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24218///
24219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24220#[inline]
24221#[target_feature(enable = "avx512f,avx512vl")]
24222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24223#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24224#[rustc_legacy_const_generics(4)]
24225pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24226    src: __m128d,
24227    k: __mmask8,
24228    a: __m128d,
24229    b: __m128d,
24230) -> __m128d {
24231    unsafe {
24232        static_assert_uimm_bits!(MASK, 8);
24233        let r = _mm_shuffle_pd::<MASK>(a, b);
24234        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24235    }
24236}
24237
24238/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24239///
24240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24241#[inline]
24242#[target_feature(enable = "avx512f,avx512vl")]
24243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24244#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24245#[rustc_legacy_const_generics(3)]
24246pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24247    unsafe {
24248        static_assert_uimm_bits!(MASK, 8);
24249        let r = _mm_shuffle_pd::<MASK>(a, b);
24250        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24251    }
24252}
24253
24254/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24255///
24256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24257#[inline]
24258#[target_feature(enable = "avx512f")]
24259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24260#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24261#[rustc_legacy_const_generics(2)]
24262pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24263    unsafe {
24264        static_assert_uimm_bits!(MASK, 8);
24265        let a = a.as_i32x16();
24266        let b = b.as_i32x16();
24267        let r: i32x16 = simd_shuffle!(
24268            a,
24269            b,
24270            [
24271                (MASK as u32 & 0b11) * 4 + 0,
24272                (MASK as u32 & 0b11) * 4 + 1,
24273                (MASK as u32 & 0b11) * 4 + 2,
24274                (MASK as u32 & 0b11) * 4 + 3,
24275                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24276                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24277                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24278                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24279                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24280                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24281                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24282                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24283                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24284                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24285                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24286                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24287            ],
24288        );
24289        transmute(r)
24290    }
24291}
24292
24293/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24294///
24295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24296#[inline]
24297#[target_feature(enable = "avx512f")]
24298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24299#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24300#[rustc_legacy_const_generics(4)]
24301pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24302    src: __m512i,
24303    k: __mmask16,
24304    a: __m512i,
24305    b: __m512i,
24306) -> __m512i {
24307    unsafe {
24308        static_assert_uimm_bits!(MASK, 8);
24309        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24310        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24311    }
24312}
24313
24314/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24320#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24321#[rustc_legacy_const_generics(3)]
24322pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24323    k: __mmask16,
24324    a: __m512i,
24325    b: __m512i,
24326) -> __m512i {
24327    unsafe {
24328        static_assert_uimm_bits!(MASK, 8);
24329        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24330        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24331    }
24332}
24333
24334/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24335///
24336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24337#[inline]
24338#[target_feature(enable = "avx512f,avx512vl")]
24339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24340#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24341#[rustc_legacy_const_generics(2)]
24342pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24343    unsafe {
24344        static_assert_uimm_bits!(MASK, 8);
24345        let a = a.as_i32x8();
24346        let b = b.as_i32x8();
24347        let r: i32x8 = simd_shuffle!(
24348            a,
24349            b,
24350            [
24351                (MASK as u32 & 0b1) * 4 + 0,
24352                (MASK as u32 & 0b1) * 4 + 1,
24353                (MASK as u32 & 0b1) * 4 + 2,
24354                (MASK as u32 & 0b1) * 4 + 3,
24355                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24356                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24357                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24358                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24359            ],
24360        );
24361        transmute(r)
24362    }
24363}
24364
24365/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24366///
24367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24368#[inline]
24369#[target_feature(enable = "avx512f,avx512vl")]
24370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24371#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24372#[rustc_legacy_const_generics(4)]
24373pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24374    src: __m256i,
24375    k: __mmask8,
24376    a: __m256i,
24377    b: __m256i,
24378) -> __m256i {
24379    unsafe {
24380        static_assert_uimm_bits!(MASK, 8);
24381        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24382        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24383    }
24384}
24385
24386/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24387///
24388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24389#[inline]
24390#[target_feature(enable = "avx512f,avx512vl")]
24391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24392#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24393#[rustc_legacy_const_generics(3)]
24394pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24395    unsafe {
24396        static_assert_uimm_bits!(MASK, 8);
24397        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24398        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24399    }
24400}
24401
24402/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24403///
24404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24405#[inline]
24406#[target_feature(enable = "avx512f")]
24407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24408#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24409#[rustc_legacy_const_generics(2)]
24410pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24411    unsafe {
24412        static_assert_uimm_bits!(MASK, 8);
24413        let a = a.as_i64x8();
24414        let b = b.as_i64x8();
24415        let r: i64x8 = simd_shuffle!(
24416            a,
24417            b,
24418            [
24419                (MASK as u32 & 0b11) * 2 + 0,
24420                (MASK as u32 & 0b11) * 2 + 1,
24421                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24422                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24423                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24424                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24425                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24426                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24427            ],
24428        );
24429        transmute(r)
24430    }
24431}
24432
24433/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24434///
24435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24436#[inline]
24437#[target_feature(enable = "avx512f")]
24438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24439#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24440#[rustc_legacy_const_generics(4)]
24441pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24442    src: __m512i,
24443    k: __mmask8,
24444    a: __m512i,
24445    b: __m512i,
24446) -> __m512i {
24447    unsafe {
24448        static_assert_uimm_bits!(MASK, 8);
24449        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24450        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24451    }
24452}
24453
24454/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24455///
24456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24457#[inline]
24458#[target_feature(enable = "avx512f")]
24459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24460#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24461#[rustc_legacy_const_generics(3)]
24462pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24463    unsafe {
24464        static_assert_uimm_bits!(MASK, 8);
24465        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24466        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24467    }
24468}
24469
24470/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24471///
24472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24473#[inline]
24474#[target_feature(enable = "avx512f,avx512vl")]
24475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24476#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24477#[rustc_legacy_const_generics(2)]
24478pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24479    unsafe {
24480        static_assert_uimm_bits!(MASK, 8);
24481        let a = a.as_i64x4();
24482        let b = b.as_i64x4();
24483        let r: i64x4 = simd_shuffle!(
24484            a,
24485            b,
24486            [
24487                (MASK as u32 & 0b1) * 2 + 0,
24488                (MASK as u32 & 0b1) * 2 + 1,
24489                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24490                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24491            ],
24492        );
24493        transmute(r)
24494    }
24495}
24496
24497/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24498///
24499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24500#[inline]
24501#[target_feature(enable = "avx512f,avx512vl")]
24502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24503#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24504#[rustc_legacy_const_generics(4)]
24505pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24506    src: __m256i,
24507    k: __mmask8,
24508    a: __m256i,
24509    b: __m256i,
24510) -> __m256i {
24511    unsafe {
24512        static_assert_uimm_bits!(MASK, 8);
24513        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24514        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24515    }
24516}
24517
24518/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24519///
24520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24521#[inline]
24522#[target_feature(enable = "avx512f,avx512vl")]
24523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24524#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24525#[rustc_legacy_const_generics(3)]
24526pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24527    unsafe {
24528        static_assert_uimm_bits!(MASK, 8);
24529        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24530        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24531    }
24532}
24533
24534/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24535///
24536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24537#[inline]
24538#[target_feature(enable = "avx512f")]
24539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24540#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24541#[rustc_legacy_const_generics(2)]
24542pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24543    unsafe {
24544        static_assert_uimm_bits!(MASK, 8);
24545        let a = a.as_f32x16();
24546        let b = b.as_f32x16();
24547        let r: f32x16 = simd_shuffle!(
24548            a,
24549            b,
24550            [
24551                (MASK as u32 & 0b11) * 4 + 0,
24552                (MASK as u32 & 0b11) * 4 + 1,
24553                (MASK as u32 & 0b11) * 4 + 2,
24554                (MASK as u32 & 0b11) * 4 + 3,
24555                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24556                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24557                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24558                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24559                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24560                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24561                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24562                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24563                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24564                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24565                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24566                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24567            ],
24568        );
24569        transmute(r)
24570    }
24571}
24572
24573/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24574///
24575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24576#[inline]
24577#[target_feature(enable = "avx512f")]
24578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24579#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24580#[rustc_legacy_const_generics(4)]
24581pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24582    src: __m512,
24583    k: __mmask16,
24584    a: __m512,
24585    b: __m512,
24586) -> __m512 {
24587    unsafe {
24588        static_assert_uimm_bits!(MASK, 8);
24589        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24590        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24591    }
24592}
24593
24594/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24595///
24596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24597#[inline]
24598#[target_feature(enable = "avx512f")]
24599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24600#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24601#[rustc_legacy_const_generics(3)]
24602pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24603    unsafe {
24604        static_assert_uimm_bits!(MASK, 8);
24605        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24606        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24607    }
24608}
24609
24610/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24611///
24612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24613#[inline]
24614#[target_feature(enable = "avx512f,avx512vl")]
24615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24616#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24617#[rustc_legacy_const_generics(2)]
24618pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24619    unsafe {
24620        static_assert_uimm_bits!(MASK, 8);
24621        let a = a.as_f32x8();
24622        let b = b.as_f32x8();
24623        let r: f32x8 = simd_shuffle!(
24624            a,
24625            b,
24626            [
24627                (MASK as u32 & 0b1) * 4 + 0,
24628                (MASK as u32 & 0b1) * 4 + 1,
24629                (MASK as u32 & 0b1) * 4 + 2,
24630                (MASK as u32 & 0b1) * 4 + 3,
24631                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24632                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24633                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24634                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24635            ],
24636        );
24637        transmute(r)
24638    }
24639}
24640
24641/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24642///
24643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24644#[inline]
24645#[target_feature(enable = "avx512f,avx512vl")]
24646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24647#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24648#[rustc_legacy_const_generics(4)]
24649pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24650    src: __m256,
24651    k: __mmask8,
24652    a: __m256,
24653    b: __m256,
24654) -> __m256 {
24655    unsafe {
24656        static_assert_uimm_bits!(MASK, 8);
24657        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24658        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24659    }
24660}
24661
24662/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24663///
24664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24665#[inline]
24666#[target_feature(enable = "avx512f,avx512vl")]
24667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24668#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24669#[rustc_legacy_const_generics(3)]
24670pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24671    unsafe {
24672        static_assert_uimm_bits!(MASK, 8);
24673        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24674        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24675    }
24676}
24677
24678/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24679///
24680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24681#[inline]
24682#[target_feature(enable = "avx512f")]
24683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24684#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24685#[rustc_legacy_const_generics(2)]
24686pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24687    unsafe {
24688        static_assert_uimm_bits!(MASK, 8);
24689        let a = a.as_f64x8();
24690        let b = b.as_f64x8();
24691        let r: f64x8 = simd_shuffle!(
24692            a,
24693            b,
24694            [
24695                (MASK as u32 & 0b11) * 2 + 0,
24696                (MASK as u32 & 0b11) * 2 + 1,
24697                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24698                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24699                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24700                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24701                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24702                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24703            ],
24704        );
24705        transmute(r)
24706    }
24707}
24708
24709/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24710///
24711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24712#[inline]
24713#[target_feature(enable = "avx512f")]
24714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24715#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24716#[rustc_legacy_const_generics(4)]
24717pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24718    src: __m512d,
24719    k: __mmask8,
24720    a: __m512d,
24721    b: __m512d,
24722) -> __m512d {
24723    unsafe {
24724        static_assert_uimm_bits!(MASK, 8);
24725        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24726        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24727    }
24728}
24729
24730/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24731///
24732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24733#[inline]
24734#[target_feature(enable = "avx512f")]
24735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24736#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24737#[rustc_legacy_const_generics(3)]
24738pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24739    unsafe {
24740        static_assert_uimm_bits!(MASK, 8);
24741        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24742        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24743    }
24744}
24745
24746/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24747///
24748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24749#[inline]
24750#[target_feature(enable = "avx512f,avx512vl")]
24751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24752#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24753#[rustc_legacy_const_generics(2)]
24754pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24755    unsafe {
24756        static_assert_uimm_bits!(MASK, 8);
24757        let a = a.as_f64x4();
24758        let b = b.as_f64x4();
24759        let r: f64x4 = simd_shuffle!(
24760            a,
24761            b,
24762            [
24763                (MASK as u32 & 0b1) * 2 + 0,
24764                (MASK as u32 & 0b1) * 2 + 1,
24765                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24766                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24767            ],
24768        );
24769        transmute(r)
24770    }
24771}
24772
24773/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24774///
24775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24776#[inline]
24777#[target_feature(enable = "avx512f,avx512vl")]
24778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24779#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24780#[rustc_legacy_const_generics(4)]
24781pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24782    src: __m256d,
24783    k: __mmask8,
24784    a: __m256d,
24785    b: __m256d,
24786) -> __m256d {
24787    unsafe {
24788        static_assert_uimm_bits!(MASK, 8);
24789        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24790        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24791    }
24792}
24793
24794/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24795///
24796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24797#[inline]
24798#[target_feature(enable = "avx512f,avx512vl")]
24799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24800#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24801#[rustc_legacy_const_generics(3)]
24802pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24803    unsafe {
24804        static_assert_uimm_bits!(MASK, 8);
24805        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24806        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24807    }
24808}
24809
24810/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24811///
24812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24813#[inline]
24814#[target_feature(enable = "avx512f")]
24815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24816#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24817#[rustc_legacy_const_generics(1)]
24818pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24819    unsafe {
24820        static_assert_uimm_bits!(IMM8, 2);
24821        match IMM8 & 0x3 {
24822            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24823            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24824            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24825            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24826        }
24827    }
24828}
24829
24830/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24831///
24832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24833#[inline]
24834#[target_feature(enable = "avx512f")]
24835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24836#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24837#[rustc_legacy_const_generics(3)]
24838pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24839    unsafe {
24840        static_assert_uimm_bits!(IMM8, 2);
24841        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24842        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24843    }
24844}
24845
24846/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24847///
24848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24849#[inline]
24850#[target_feature(enable = "avx512f")]
24851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24852#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24853#[rustc_legacy_const_generics(2)]
24854pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24855    unsafe {
24856        static_assert_uimm_bits!(IMM8, 2);
24857        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24858        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24859    }
24860}
24861
24862/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24863///
24864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24865#[inline]
24866#[target_feature(enable = "avx512f,avx512vl")]
24867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24868#[cfg_attr(
24869    test,
24870    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24871)]
24872#[rustc_legacy_const_generics(1)]
24873pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24874    unsafe {
24875        static_assert_uimm_bits!(IMM8, 1);
24876        match IMM8 & 0x1 {
24877            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24878            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24879        }
24880    }
24881}
24882
24883/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24884///
24885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24886#[inline]
24887#[target_feature(enable = "avx512f,avx512vl")]
24888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24889#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24890#[rustc_legacy_const_generics(3)]
24891pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24892    unsafe {
24893        static_assert_uimm_bits!(IMM8, 1);
24894        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24895        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24896    }
24897}
24898
24899/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24900///
24901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24902#[inline]
24903#[target_feature(enable = "avx512f,avx512vl")]
24904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24905#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24906#[rustc_legacy_const_generics(2)]
24907pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24908    unsafe {
24909        static_assert_uimm_bits!(IMM8, 1);
24910        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24911        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24912    }
24913}
24914
24915/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24916///
24917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24918#[inline]
24919#[target_feature(enable = "avx512f")]
24920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24921#[cfg_attr(
24922    test,
24923    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24924)]
24925#[rustc_legacy_const_generics(1)]
24926pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24927    unsafe {
24928        static_assert_uimm_bits!(IMM1, 1);
24929        match IMM1 {
24930            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24931            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24932        }
24933    }
24934}
24935
24936/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24937///
24938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24939#[inline]
24940#[target_feature(enable = "avx512f")]
24941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24942#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
24943#[rustc_legacy_const_generics(3)]
24944pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24945    src: __m256i,
24946    k: __mmask8,
24947    a: __m512i,
24948) -> __m256i {
24949    unsafe {
24950        static_assert_uimm_bits!(IMM1, 1);
24951        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24952        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24953    }
24954}
24955
24956/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24957///
24958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24959#[inline]
24960#[target_feature(enable = "avx512f")]
24961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24962#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
24963#[rustc_legacy_const_generics(2)]
24964pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24965    unsafe {
24966        static_assert_uimm_bits!(IMM1, 1);
24967        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24968        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24969    }
24970}
24971
24972/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24973///
24974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
24975#[inline]
24976#[target_feature(enable = "avx512f")]
24977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24978#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
24979#[rustc_legacy_const_generics(1)]
24980pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
24981    unsafe {
24982        static_assert_uimm_bits!(IMM8, 1);
24983        match IMM8 & 0x1 {
24984            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
24985            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
24986        }
24987    }
24988}
24989
24990/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24991///
24992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
24993#[inline]
24994#[target_feature(enable = "avx512f")]
24995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24996#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
24997#[rustc_legacy_const_generics(3)]
24998pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
24999    src: __m256d,
25000    k: __mmask8,
25001    a: __m512d,
25002) -> __m256d {
25003    unsafe {
25004        static_assert_uimm_bits!(IMM8, 1);
25005        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25006        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25007    }
25008}
25009
25010/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25011///
25012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25013#[inline]
25014#[target_feature(enable = "avx512f")]
25015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25016#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25017#[rustc_legacy_const_generics(2)]
25018pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25019    unsafe {
25020        static_assert_uimm_bits!(IMM8, 1);
25021        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25022        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25023    }
25024}
25025
25026/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25027///
25028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25029#[inline]
25030#[target_feature(enable = "avx512f")]
25031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25032#[cfg_attr(
25033    test,
25034    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25035)]
25036#[rustc_legacy_const_generics(1)]
25037pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25038    unsafe {
25039        static_assert_uimm_bits!(IMM2, 2);
25040        let a = a.as_i32x16();
25041        let zero = i32x16::ZERO;
25042        let extract: i32x4 = match IMM2 {
25043            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25044            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25045            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25046            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25047        };
25048        transmute(extract)
25049    }
25050}
25051
25052/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25053///
25054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25055#[inline]
25056#[target_feature(enable = "avx512f")]
25057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25058#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25059#[rustc_legacy_const_generics(3)]
25060pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25061    src: __m128i,
25062    k: __mmask8,
25063    a: __m512i,
25064) -> __m128i {
25065    unsafe {
25066        static_assert_uimm_bits!(IMM2, 2);
25067        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25068        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25069    }
25070}
25071
25072/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25073///
25074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25075#[inline]
25076#[target_feature(enable = "avx512f")]
25077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25078#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25079#[rustc_legacy_const_generics(2)]
25080pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25081    unsafe {
25082        static_assert_uimm_bits!(IMM2, 2);
25083        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25084        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25085    }
25086}
25087
25088/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25089///
25090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25091#[inline]
25092#[target_feature(enable = "avx512f,avx512vl")]
25093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25094#[cfg_attr(
25095    test,
25096    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25097)]
25098#[rustc_legacy_const_generics(1)]
25099pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25100    unsafe {
25101        static_assert_uimm_bits!(IMM1, 1);
25102        let a = a.as_i32x8();
25103        let zero = i32x8::ZERO;
25104        let extract: i32x4 = match IMM1 {
25105            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25106            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25107        };
25108        transmute(extract)
25109    }
25110}
25111
25112/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25113///
25114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25115#[inline]
25116#[target_feature(enable = "avx512f,avx512vl")]
25117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25118#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25119#[rustc_legacy_const_generics(3)]
25120pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25121    src: __m128i,
25122    k: __mmask8,
25123    a: __m256i,
25124) -> __m128i {
25125    unsafe {
25126        static_assert_uimm_bits!(IMM1, 1);
25127        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25128        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25129    }
25130}
25131
25132/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25133///
25134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25135#[inline]
25136#[target_feature(enable = "avx512f,avx512vl")]
25137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25138#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25139#[rustc_legacy_const_generics(2)]
25140pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25141    unsafe {
25142        static_assert_uimm_bits!(IMM1, 1);
25143        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25144        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25145    }
25146}
25147
25148/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25149///
25150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25151#[inline]
25152#[target_feature(enable = "avx512f")]
25153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25154#[cfg_attr(test, assert_instr(vmovsldup))]
25155pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25156    unsafe {
25157        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25158        transmute(r)
25159    }
25160}
25161
25162/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25163///
25164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25165#[inline]
25166#[target_feature(enable = "avx512f")]
25167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25168#[cfg_attr(test, assert_instr(vmovsldup))]
25169pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25170    unsafe {
25171        let mov: f32x16 =
25172            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25173        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25174    }
25175}
25176
25177/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25178///
25179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25180#[inline]
25181#[target_feature(enable = "avx512f")]
25182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25183#[cfg_attr(test, assert_instr(vmovsldup))]
25184pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25185    unsafe {
25186        let mov: f32x16 =
25187            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25188        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25189    }
25190}
25191
25192/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25193///
25194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25195#[inline]
25196#[target_feature(enable = "avx512f,avx512vl")]
25197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25198#[cfg_attr(test, assert_instr(vmovsldup))]
25199pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25200    unsafe {
25201        let mov = _mm256_moveldup_ps(a);
25202        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25203    }
25204}
25205
25206/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25207///
25208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25209#[inline]
25210#[target_feature(enable = "avx512f,avx512vl")]
25211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25212#[cfg_attr(test, assert_instr(vmovsldup))]
25213pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25214    unsafe {
25215        let mov = _mm256_moveldup_ps(a);
25216        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25217    }
25218}
25219
25220/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221///
25222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25223#[inline]
25224#[target_feature(enable = "avx512f,avx512vl")]
25225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25226#[cfg_attr(test, assert_instr(vmovsldup))]
25227pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25228    unsafe {
25229        let mov = _mm_moveldup_ps(a);
25230        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25231    }
25232}
25233
25234/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235///
25236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25237#[inline]
25238#[target_feature(enable = "avx512f,avx512vl")]
25239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25240#[cfg_attr(test, assert_instr(vmovsldup))]
25241pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25242    unsafe {
25243        let mov = _mm_moveldup_ps(a);
25244        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25245    }
25246}
25247
25248/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25251#[inline]
25252#[target_feature(enable = "avx512f")]
25253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25254#[cfg_attr(test, assert_instr(vmovshdup))]
25255pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25256    unsafe {
25257        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25258        transmute(r)
25259    }
25260}
25261
25262/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25263///
25264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25265#[inline]
25266#[target_feature(enable = "avx512f")]
25267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25268#[cfg_attr(test, assert_instr(vmovshdup))]
25269pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25270    unsafe {
25271        let mov: f32x16 =
25272            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25273        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25274    }
25275}
25276
25277/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25278///
25279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25280#[inline]
25281#[target_feature(enable = "avx512f")]
25282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25283#[cfg_attr(test, assert_instr(vmovshdup))]
25284pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25285    unsafe {
25286        let mov: f32x16 =
25287            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25288        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25289    }
25290}
25291
25292/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25293///
25294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25295#[inline]
25296#[target_feature(enable = "avx512f,avx512vl")]
25297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25298#[cfg_attr(test, assert_instr(vmovshdup))]
25299pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25300    unsafe {
25301        let mov = _mm256_movehdup_ps(a);
25302        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25303    }
25304}
25305
25306/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25307///
25308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25309#[inline]
25310#[target_feature(enable = "avx512f,avx512vl")]
25311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25312#[cfg_attr(test, assert_instr(vmovshdup))]
25313pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25314    unsafe {
25315        let mov = _mm256_movehdup_ps(a);
25316        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25317    }
25318}
25319
25320/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321///
25322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25323#[inline]
25324#[target_feature(enable = "avx512f,avx512vl")]
25325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25326#[cfg_attr(test, assert_instr(vmovshdup))]
25327pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25328    unsafe {
25329        let mov = _mm_movehdup_ps(a);
25330        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25331    }
25332}
25333
25334/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335///
25336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25337#[inline]
25338#[target_feature(enable = "avx512f,avx512vl")]
25339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25340#[cfg_attr(test, assert_instr(vmovshdup))]
25341pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25342    unsafe {
25343        let mov = _mm_movehdup_ps(a);
25344        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25345    }
25346}
25347
25348/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25349///
25350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25351#[inline]
25352#[target_feature(enable = "avx512f")]
25353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25354#[cfg_attr(test, assert_instr(vmovddup))]
25355pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25356    unsafe {
25357        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25358        transmute(r)
25359    }
25360}
25361
25362/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25363///
25364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25365#[inline]
25366#[target_feature(enable = "avx512f")]
25367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25368#[cfg_attr(test, assert_instr(vmovddup))]
25369pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25370    unsafe {
25371        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25372        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25373    }
25374}
25375
25376/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25377///
25378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25379#[inline]
25380#[target_feature(enable = "avx512f")]
25381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25382#[cfg_attr(test, assert_instr(vmovddup))]
25383pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25384    unsafe {
25385        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25386        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25387    }
25388}
25389
25390/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391///
25392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25393#[inline]
25394#[target_feature(enable = "avx512f,avx512vl")]
25395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25396#[cfg_attr(test, assert_instr(vmovddup))]
25397pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25398    unsafe {
25399        let mov = _mm256_movedup_pd(a);
25400        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25401    }
25402}
25403
25404/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405///
25406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25407#[inline]
25408#[target_feature(enable = "avx512f,avx512vl")]
25409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25410#[cfg_attr(test, assert_instr(vmovddup))]
25411pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25412    unsafe {
25413        let mov = _mm256_movedup_pd(a);
25414        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25415    }
25416}
25417
25418/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419///
25420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25421#[inline]
25422#[target_feature(enable = "avx512f,avx512vl")]
25423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25424#[cfg_attr(test, assert_instr(vmovddup))]
25425pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25426    unsafe {
25427        let mov = _mm_movedup_pd(a);
25428        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25429    }
25430}
25431
25432/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433///
25434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25435#[inline]
25436#[target_feature(enable = "avx512f,avx512vl")]
25437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25438#[cfg_attr(test, assert_instr(vmovddup))]
25439pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25440    unsafe {
25441        let mov = _mm_movedup_pd(a);
25442        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25443    }
25444}
25445
25446/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25447///
25448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25449#[inline]
25450#[target_feature(enable = "avx512f")]
25451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25452#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25453#[rustc_legacy_const_generics(2)]
25454pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25455    unsafe {
25456        static_assert_uimm_bits!(IMM8, 2);
25457        let a = a.as_i32x16();
25458        let b = _mm512_castsi128_si512(b).as_i32x16();
25459        let ret: i32x16 = match IMM8 & 0b11 {
25460            0 => {
25461                simd_shuffle!(
25462                    a,
25463                    b,
25464                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25465                )
25466            }
25467            1 => {
25468                simd_shuffle!(
25469                    a,
25470                    b,
25471                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25472                )
25473            }
25474            2 => {
25475                simd_shuffle!(
25476                    a,
25477                    b,
25478                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25479                )
25480            }
25481            _ => {
25482                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25483            }
25484        };
25485        transmute(ret)
25486    }
25487}
25488
25489/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25490///
25491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25492#[inline]
25493#[target_feature(enable = "avx512f")]
25494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25495#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25496#[rustc_legacy_const_generics(4)]
25497pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25498    src: __m512i,
25499    k: __mmask16,
25500    a: __m512i,
25501    b: __m128i,
25502) -> __m512i {
25503    unsafe {
25504        static_assert_uimm_bits!(IMM8, 2);
25505        let r = _mm512_inserti32x4::<IMM8>(a, b);
25506        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25507    }
25508}
25509
25510/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25511///
25512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25513#[inline]
25514#[target_feature(enable = "avx512f")]
25515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25516#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25517#[rustc_legacy_const_generics(3)]
25518pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25519    unsafe {
25520        static_assert_uimm_bits!(IMM8, 2);
25521        let r = _mm512_inserti32x4::<IMM8>(a, b);
25522        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25523    }
25524}
25525
25526/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25527///
25528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25529#[inline]
25530#[target_feature(enable = "avx512f,avx512vl")]
25531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25532#[cfg_attr(
25533    test,
25534    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25535)]
25536#[rustc_legacy_const_generics(2)]
25537pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25538    unsafe {
25539        static_assert_uimm_bits!(IMM8, 1);
25540        let a = a.as_i32x8();
25541        let b = _mm256_castsi128_si256(b).as_i32x8();
25542        let ret: i32x8 = match IMM8 & 0b1 {
25543            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25544            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25545        };
25546        transmute(ret)
25547    }
25548}
25549
25550/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25551///
25552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25553#[inline]
25554#[target_feature(enable = "avx512f,avx512vl")]
25555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25556#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25557#[rustc_legacy_const_generics(4)]
25558pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25559    src: __m256i,
25560    k: __mmask8,
25561    a: __m256i,
25562    b: __m128i,
25563) -> __m256i {
25564    unsafe {
25565        static_assert_uimm_bits!(IMM8, 1);
25566        let r = _mm256_inserti32x4::<IMM8>(a, b);
25567        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25568    }
25569}
25570
25571/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25572///
25573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25574#[inline]
25575#[target_feature(enable = "avx512f,avx512vl")]
25576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25577#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25578#[rustc_legacy_const_generics(3)]
25579pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25580    unsafe {
25581        static_assert_uimm_bits!(IMM8, 1);
25582        let r = _mm256_inserti32x4::<IMM8>(a, b);
25583        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25584    }
25585}
25586
25587/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25588///
25589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25590#[inline]
25591#[target_feature(enable = "avx512f")]
25592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25593#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25594#[rustc_legacy_const_generics(2)]
25595pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25596    unsafe {
25597        static_assert_uimm_bits!(IMM8, 1);
25598        let b = _mm512_castsi256_si512(b);
25599        match IMM8 & 0b1 {
25600            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25601            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25602        }
25603    }
25604}
25605
25606/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25607///
25608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25609#[inline]
25610#[target_feature(enable = "avx512f")]
25611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25612#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25613#[rustc_legacy_const_generics(4)]
25614pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25615    src: __m512i,
25616    k: __mmask8,
25617    a: __m512i,
25618    b: __m256i,
25619) -> __m512i {
25620    unsafe {
25621        static_assert_uimm_bits!(IMM8, 1);
25622        let r = _mm512_inserti64x4::<IMM8>(a, b);
25623        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25624    }
25625}
25626
25627/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25630#[inline]
25631#[target_feature(enable = "avx512f")]
25632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25633#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25634#[rustc_legacy_const_generics(3)]
25635pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25636    unsafe {
25637        static_assert_uimm_bits!(IMM8, 1);
25638        let r = _mm512_inserti64x4::<IMM8>(a, b);
25639        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25640    }
25641}
25642
25643/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25644///
25645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25646#[inline]
25647#[target_feature(enable = "avx512f")]
25648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25649#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25650#[rustc_legacy_const_generics(2)]
25651pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25652    unsafe {
25653        static_assert_uimm_bits!(IMM8, 2);
25654        let b = _mm512_castps128_ps512(b);
25655        match IMM8 & 0b11 {
25656            0 => {
25657                simd_shuffle!(
25658                    a,
25659                    b,
25660                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25661                )
25662            }
25663            1 => {
25664                simd_shuffle!(
25665                    a,
25666                    b,
25667                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25668                )
25669            }
25670            2 => {
25671                simd_shuffle!(
25672                    a,
25673                    b,
25674                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25675                )
25676            }
25677            _ => {
25678                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25679            }
25680        }
25681    }
25682}
25683
25684/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25685///
25686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25687#[inline]
25688#[target_feature(enable = "avx512f")]
25689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25690#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25691#[rustc_legacy_const_generics(4)]
25692pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25693    src: __m512,
25694    k: __mmask16,
25695    a: __m512,
25696    b: __m128,
25697) -> __m512 {
25698    unsafe {
25699        static_assert_uimm_bits!(IMM8, 2);
25700        let r = _mm512_insertf32x4::<IMM8>(a, b);
25701        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25702    }
25703}
25704
25705/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25706///
25707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25708#[inline]
25709#[target_feature(enable = "avx512f")]
25710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25711#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25712#[rustc_legacy_const_generics(3)]
25713pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25714    unsafe {
25715        static_assert_uimm_bits!(IMM8, 2);
25716        let r = _mm512_insertf32x4::<IMM8>(a, b);
25717        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25718    }
25719}
25720
25721/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25722///
25723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25724#[inline]
25725#[target_feature(enable = "avx512f,avx512vl")]
25726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25727#[cfg_attr(
25728    test,
25729    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25730)]
25731#[rustc_legacy_const_generics(2)]
25732pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25733    unsafe {
25734        static_assert_uimm_bits!(IMM8, 1);
25735        let b = _mm256_castps128_ps256(b);
25736        match IMM8 & 0b1 {
25737            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25738            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25739        }
25740    }
25741}
25742
25743/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25744///
25745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25746#[inline]
25747#[target_feature(enable = "avx512f,avx512vl")]
25748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25749#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25750#[rustc_legacy_const_generics(4)]
25751pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25752    src: __m256,
25753    k: __mmask8,
25754    a: __m256,
25755    b: __m128,
25756) -> __m256 {
25757    unsafe {
25758        static_assert_uimm_bits!(IMM8, 1);
25759        let r = _mm256_insertf32x4::<IMM8>(a, b);
25760        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25761    }
25762}
25763
25764/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25765///
25766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25767#[inline]
25768#[target_feature(enable = "avx512f,avx512vl")]
25769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25770#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25771#[rustc_legacy_const_generics(3)]
25772pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25773    unsafe {
25774        static_assert_uimm_bits!(IMM8, 1);
25775        let r = _mm256_insertf32x4::<IMM8>(a, b);
25776        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25777    }
25778}
25779
25780/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25781///
25782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25783#[inline]
25784#[target_feature(enable = "avx512f")]
25785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25786#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25787#[rustc_legacy_const_generics(2)]
25788pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25789    unsafe {
25790        static_assert_uimm_bits!(IMM8, 1);
25791        let b = _mm512_castpd256_pd512(b);
25792        match IMM8 & 0b1 {
25793            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25794            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25795        }
25796    }
25797}
25798
25799/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25800///
25801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25802#[inline]
25803#[target_feature(enable = "avx512f")]
25804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25805#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25806#[rustc_legacy_const_generics(4)]
25807pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25808    src: __m512d,
25809    k: __mmask8,
25810    a: __m512d,
25811    b: __m256d,
25812) -> __m512d {
25813    unsafe {
25814        static_assert_uimm_bits!(IMM8, 1);
25815        let r = _mm512_insertf64x4::<IMM8>(a, b);
25816        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25817    }
25818}
25819
25820/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25821///
25822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25823#[inline]
25824#[target_feature(enable = "avx512f")]
25825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25826#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25827#[rustc_legacy_const_generics(3)]
25828pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25829    unsafe {
25830        static_assert_uimm_bits!(IMM8, 1);
25831        let r = _mm512_insertf64x4::<IMM8>(a, b);
25832        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25833    }
25834}
25835
25836/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25837///
25838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25839#[inline]
25840#[target_feature(enable = "avx512f")]
25841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25842#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25843pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25844    unsafe {
25845        let a = a.as_i32x16();
25846        let b = b.as_i32x16();
25847        #[rustfmt::skip]
25848        let r: i32x16 = simd_shuffle!(
25849            a, b,
25850            [ 2, 18, 3, 19,
25851              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25852              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25853              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25854        );
25855        transmute(r)
25856    }
25857}
25858
25859/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860///
25861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25862#[inline]
25863#[target_feature(enable = "avx512f")]
25864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25865#[cfg_attr(test, assert_instr(vpunpckhdq))]
25866pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25867    unsafe {
25868        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25869        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25870    }
25871}
25872
25873/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25874///
25875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25876#[inline]
25877#[target_feature(enable = "avx512f")]
25878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25879#[cfg_attr(test, assert_instr(vpunpckhdq))]
25880pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25881    unsafe {
25882        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25883        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25884    }
25885}
25886
25887/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25888///
25889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25890#[inline]
25891#[target_feature(enable = "avx512f,avx512vl")]
25892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25893#[cfg_attr(test, assert_instr(vpunpckhdq))]
25894pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25895    unsafe {
25896        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25897        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25898    }
25899}
25900
25901/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25902///
25903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25904#[inline]
25905#[target_feature(enable = "avx512f,avx512vl")]
25906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25907#[cfg_attr(test, assert_instr(vpunpckhdq))]
25908pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25909    unsafe {
25910        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25911        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25912    }
25913}
25914
25915/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25916///
25917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25918#[inline]
25919#[target_feature(enable = "avx512f,avx512vl")]
25920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25921#[cfg_attr(test, assert_instr(vpunpckhdq))]
25922pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25923    unsafe {
25924        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25925        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25926    }
25927}
25928
25929/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25930///
25931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25932#[inline]
25933#[target_feature(enable = "avx512f,avx512vl")]
25934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25935#[cfg_attr(test, assert_instr(vpunpckhdq))]
25936pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25937    unsafe {
25938        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25939        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
25940    }
25941}
25942
25943/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25944///
25945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25946#[inline]
25947#[target_feature(enable = "avx512f")]
25948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25949#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25950pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25951    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
25952}
25953
25954/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25955///
25956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25957#[inline]
25958#[target_feature(enable = "avx512f")]
25959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25960#[cfg_attr(test, assert_instr(vpunpckhqdq))]
25961pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25962    unsafe {
25963        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
25964        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
25965    }
25966}
25967
25968/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25969///
25970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
25971#[inline]
25972#[target_feature(enable = "avx512f")]
25973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25974#[cfg_attr(test, assert_instr(vpunpckhqdq))]
25975pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25976    unsafe {
25977        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
25978        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
25979    }
25980}
25981
25982/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
25985#[inline]
25986#[target_feature(enable = "avx512f,avx512vl")]
25987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25988#[cfg_attr(test, assert_instr(vpunpckhqdq))]
25989pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25990    unsafe {
25991        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
25992        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
25993    }
25994}
25995
25996/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25997///
25998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
25999#[inline]
26000#[target_feature(enable = "avx512f,avx512vl")]
26001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26002#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26003pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26004    unsafe {
26005        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26006        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26007    }
26008}
26009
26010/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26011///
26012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26013#[inline]
26014#[target_feature(enable = "avx512f,avx512vl")]
26015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26016#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26017pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26018    unsafe {
26019        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26020        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26021    }
26022}
26023
26024/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26025///
26026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26027#[inline]
26028#[target_feature(enable = "avx512f,avx512vl")]
26029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26030#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26031pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26032    unsafe {
26033        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26034        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26035    }
26036}
26037
26038/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26039///
26040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26041#[inline]
26042#[target_feature(enable = "avx512f")]
26043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26044#[cfg_attr(test, assert_instr(vunpckhps))]
26045pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26046    unsafe {
26047        #[rustfmt::skip]
26048        simd_shuffle!(
26049            a, b,
26050            [ 2, 18, 3, 19,
26051              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26052              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26053              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26054        )
26055    }
26056}
26057
26058/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26059///
26060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26061#[inline]
26062#[target_feature(enable = "avx512f")]
26063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26064#[cfg_attr(test, assert_instr(vunpckhps))]
26065pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26066    unsafe {
26067        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26068        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26069    }
26070}
26071
26072/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26073///
26074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26075#[inline]
26076#[target_feature(enable = "avx512f")]
26077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26078#[cfg_attr(test, assert_instr(vunpckhps))]
26079pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26080    unsafe {
26081        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26082        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26083    }
26084}
26085
26086/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26087///
26088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26089#[inline]
26090#[target_feature(enable = "avx512f,avx512vl")]
26091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26092#[cfg_attr(test, assert_instr(vunpckhps))]
26093pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26094    unsafe {
26095        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26096        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26097    }
26098}
26099
26100/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26101///
26102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26103#[inline]
26104#[target_feature(enable = "avx512f,avx512vl")]
26105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26106#[cfg_attr(test, assert_instr(vunpckhps))]
26107pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26108    unsafe {
26109        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26110        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26111    }
26112}
26113
26114/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26117#[inline]
26118#[target_feature(enable = "avx512f,avx512vl")]
26119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26120#[cfg_attr(test, assert_instr(vunpckhps))]
26121pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26122    unsafe {
26123        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26124        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26125    }
26126}
26127
26128/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26129///
26130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26131#[inline]
26132#[target_feature(enable = "avx512f,avx512vl")]
26133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26134#[cfg_attr(test, assert_instr(vunpckhps))]
26135pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26136    unsafe {
26137        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26138        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26139    }
26140}
26141
26142/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26143///
26144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26145#[inline]
26146#[target_feature(enable = "avx512f")]
26147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26148#[cfg_attr(test, assert_instr(vunpckhpd))]
26149pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26150    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26151}
26152
26153/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26154///
26155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26156#[inline]
26157#[target_feature(enable = "avx512f")]
26158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26159#[cfg_attr(test, assert_instr(vunpckhpd))]
26160pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26161    unsafe {
26162        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26163        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26164    }
26165}
26166
26167/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26168///
26169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26170#[inline]
26171#[target_feature(enable = "avx512f")]
26172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26173#[cfg_attr(test, assert_instr(vunpckhpd))]
26174pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26175    unsafe {
26176        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26177        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26178    }
26179}
26180
26181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26182///
26183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26184#[inline]
26185#[target_feature(enable = "avx512f,avx512vl")]
26186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26187#[cfg_attr(test, assert_instr(vunpckhpd))]
26188pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26189    unsafe {
26190        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26191        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26192    }
26193}
26194
26195/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26196///
26197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26198#[inline]
26199#[target_feature(enable = "avx512f,avx512vl")]
26200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26201#[cfg_attr(test, assert_instr(vunpckhpd))]
26202pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26203    unsafe {
26204        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26205        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26206    }
26207}
26208
26209/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26210///
26211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26212#[inline]
26213#[target_feature(enable = "avx512f,avx512vl")]
26214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26215#[cfg_attr(test, assert_instr(vunpckhpd))]
26216pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26217    unsafe {
26218        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26219        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26220    }
26221}
26222
26223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26224///
26225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26226#[inline]
26227#[target_feature(enable = "avx512f,avx512vl")]
26228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26229#[cfg_attr(test, assert_instr(vunpckhpd))]
26230pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26231    unsafe {
26232        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26233        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26234    }
26235}
26236
26237/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26240#[inline]
26241#[target_feature(enable = "avx512f")]
26242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26243#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26244pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26245    unsafe {
26246        let a = a.as_i32x16();
26247        let b = b.as_i32x16();
26248        #[rustfmt::skip]
26249        let r: i32x16 = simd_shuffle!(
26250            a, b,
26251            [ 0, 16, 1, 17,
26252              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26253              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26254              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26255        );
26256        transmute(r)
26257    }
26258}
26259
26260/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26261///
26262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26263#[inline]
26264#[target_feature(enable = "avx512f")]
26265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26266#[cfg_attr(test, assert_instr(vpunpckldq))]
26267pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26268    unsafe {
26269        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26270        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26271    }
26272}
26273
26274/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26275///
26276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26277#[inline]
26278#[target_feature(enable = "avx512f")]
26279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26280#[cfg_attr(test, assert_instr(vpunpckldq))]
26281pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26282    unsafe {
26283        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26284        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26285    }
26286}
26287
26288/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26289///
26290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26291#[inline]
26292#[target_feature(enable = "avx512f,avx512vl")]
26293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26294#[cfg_attr(test, assert_instr(vpunpckldq))]
26295pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26296    unsafe {
26297        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26298        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26299    }
26300}
26301
26302/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26303///
26304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26305#[inline]
26306#[target_feature(enable = "avx512f,avx512vl")]
26307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26308#[cfg_attr(test, assert_instr(vpunpckldq))]
26309pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26310    unsafe {
26311        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26312        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26313    }
26314}
26315
26316/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26317///
26318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26319#[inline]
26320#[target_feature(enable = "avx512f,avx512vl")]
26321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26322#[cfg_attr(test, assert_instr(vpunpckldq))]
26323pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26324    unsafe {
26325        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26326        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26327    }
26328}
26329
26330/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26331///
26332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26333#[inline]
26334#[target_feature(enable = "avx512f,avx512vl")]
26335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26336#[cfg_attr(test, assert_instr(vpunpckldq))]
26337pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26338    unsafe {
26339        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26340        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26341    }
26342}
26343
26344/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26345///
26346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26347#[inline]
26348#[target_feature(enable = "avx512f")]
26349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26350#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26351pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26352    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26353}
26354
26355/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26356///
26357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26358#[inline]
26359#[target_feature(enable = "avx512f")]
26360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26361#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26362pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26363    unsafe {
26364        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26365        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26366    }
26367}
26368
26369/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26370///
26371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26372#[inline]
26373#[target_feature(enable = "avx512f")]
26374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26375#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26376pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26377    unsafe {
26378        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26379        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26380    }
26381}
26382
26383/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26384///
26385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26386#[inline]
26387#[target_feature(enable = "avx512f,avx512vl")]
26388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26389#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26390pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26391    unsafe {
26392        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26393        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26394    }
26395}
26396
26397/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26398///
26399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26400#[inline]
26401#[target_feature(enable = "avx512f,avx512vl")]
26402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26403#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26404pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26405    unsafe {
26406        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26407        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26408    }
26409}
26410
26411/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26412///
26413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26414#[inline]
26415#[target_feature(enable = "avx512f,avx512vl")]
26416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26417#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26418pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26419    unsafe {
26420        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26421        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26422    }
26423}
26424
26425/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26426///
26427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26428#[inline]
26429#[target_feature(enable = "avx512f,avx512vl")]
26430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26431#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26432pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26433    unsafe {
26434        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26435        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26436    }
26437}
26438
26439/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26440///
26441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26442#[inline]
26443#[target_feature(enable = "avx512f")]
26444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26445#[cfg_attr(test, assert_instr(vunpcklps))]
26446pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26447    unsafe {
26448        #[rustfmt::skip]
26449        simd_shuffle!(a, b,
26450                       [ 0, 16, 1, 17,
26451                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26452                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26453                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26454        )
26455    }
26456}
26457
26458/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26459///
26460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26461#[inline]
26462#[target_feature(enable = "avx512f")]
26463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26464#[cfg_attr(test, assert_instr(vunpcklps))]
26465pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26466    unsafe {
26467        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26468        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26469    }
26470}
26471
26472/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26473///
26474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26475#[inline]
26476#[target_feature(enable = "avx512f")]
26477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26478#[cfg_attr(test, assert_instr(vunpcklps))]
26479pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26480    unsafe {
26481        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26482        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26483    }
26484}
26485
26486/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26487///
26488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26489#[inline]
26490#[target_feature(enable = "avx512f,avx512vl")]
26491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26492#[cfg_attr(test, assert_instr(vunpcklps))]
26493pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26494    unsafe {
26495        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26496        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26497    }
26498}
26499
26500/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26501///
26502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26503#[inline]
26504#[target_feature(enable = "avx512f,avx512vl")]
26505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26506#[cfg_attr(test, assert_instr(vunpcklps))]
26507pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26508    unsafe {
26509        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26510        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26511    }
26512}
26513
26514/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26520#[cfg_attr(test, assert_instr(vunpcklps))]
26521pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26522    unsafe {
26523        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26524        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26525    }
26526}
26527
26528/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26529///
26530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26531#[inline]
26532#[target_feature(enable = "avx512f,avx512vl")]
26533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26534#[cfg_attr(test, assert_instr(vunpcklps))]
26535pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26536    unsafe {
26537        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26538        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26539    }
26540}
26541
26542/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26543///
26544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26545#[inline]
26546#[target_feature(enable = "avx512f")]
26547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26548#[cfg_attr(test, assert_instr(vunpcklpd))]
26549pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26550    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26551}
26552
26553/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26554///
26555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26556#[inline]
26557#[target_feature(enable = "avx512f")]
26558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26559#[cfg_attr(test, assert_instr(vunpcklpd))]
26560pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26561    unsafe {
26562        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26563        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26564    }
26565}
26566
26567/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26568///
26569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26570#[inline]
26571#[target_feature(enable = "avx512f")]
26572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26573#[cfg_attr(test, assert_instr(vunpcklpd))]
26574pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26575    unsafe {
26576        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26577        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26578    }
26579}
26580
26581/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26582///
26583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26584#[inline]
26585#[target_feature(enable = "avx512f,avx512vl")]
26586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26587#[cfg_attr(test, assert_instr(vunpcklpd))]
26588pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26589    unsafe {
26590        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26591        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26592    }
26593}
26594
26595/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26596///
26597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26598#[inline]
26599#[target_feature(enable = "avx512f,avx512vl")]
26600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26601#[cfg_attr(test, assert_instr(vunpcklpd))]
26602pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26603    unsafe {
26604        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26605        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26606    }
26607}
26608
26609/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26610///
26611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26612#[inline]
26613#[target_feature(enable = "avx512f,avx512vl")]
26614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26615#[cfg_attr(test, assert_instr(vunpcklpd))]
26616pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26617    unsafe {
26618        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26619        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26620    }
26621}
26622
26623/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26624///
26625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26626#[inline]
26627#[target_feature(enable = "avx512f,avx512vl")]
26628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26629#[cfg_attr(test, assert_instr(vunpcklpd))]
26630pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26631    unsafe {
26632        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26633        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26634    }
26635}
26636
26637/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26638///
26639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26640#[inline]
26641#[target_feature(enable = "avx512f")]
26642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26643pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26644    unsafe {
26645        simd_shuffle!(
26646            a,
26647            _mm_undefined_ps(),
26648            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26649        )
26650    }
26651}
26652
26653/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26654///
26655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26656#[inline]
26657#[target_feature(enable = "avx512f")]
26658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26659pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26660    unsafe {
26661        simd_shuffle!(
26662            a,
26663            _mm256_undefined_ps(),
26664            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26665        )
26666    }
26667}
26668
26669/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26670///
26671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26672#[inline]
26673#[target_feature(enable = "avx512f")]
26674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26675pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26676    unsafe {
26677        simd_shuffle!(
26678            a,
26679            _mm_set1_ps(0.),
26680            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26681        )
26682    }
26683}
26684
26685/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26686///
26687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26688#[inline]
26689#[target_feature(enable = "avx512f")]
26690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26691pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26692    unsafe {
26693        simd_shuffle!(
26694            a,
26695            _mm256_set1_ps(0.),
26696            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26697        )
26698    }
26699}
26700
26701/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26702///
26703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26704#[inline]
26705#[target_feature(enable = "avx512f")]
26706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26707pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26708    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26709}
26710
26711/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26712///
26713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26714#[inline]
26715#[target_feature(enable = "avx512f")]
26716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26717pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26718    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26719}
26720
26721/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26722///
26723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26724#[inline]
26725#[target_feature(enable = "avx512f")]
26726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26727pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26728    unsafe { transmute(a) }
26729}
26730
26731/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26732///
26733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26734#[inline]
26735#[target_feature(enable = "avx512f")]
26736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26737pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26738    unsafe { transmute(a) }
26739}
26740
26741/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26742///
26743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26744#[inline]
26745#[target_feature(enable = "avx512f")]
26746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26747pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26748    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26749}
26750
26751/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26752///
26753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26754#[inline]
26755#[target_feature(enable = "avx512f")]
26756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26757pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26758    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26759}
26760
26761/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26768    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26769}
26770
26771/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26778    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26779}
26780
26781/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26788    unsafe { simd_shuffle!(a, a, [0, 1]) }
26789}
26790
26791/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26798    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26799}
26800
26801/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26808    unsafe { transmute(a) }
26809}
26810
26811/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26818    unsafe { transmute(a) }
26819}
26820
26821/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26828    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26829}
26830
26831/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26838    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26839}
26840
26841/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26848    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26849}
26850
26851/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26858    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26859}
26860
26861/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26868    unsafe { simd_shuffle!(a, a, [0, 1]) }
26869}
26870
26871/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26878    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26879}
26880
26881/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26888    unsafe { transmute(a) }
26889}
26890
26891/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26898    unsafe { transmute(a) }
26899}
26900
26901/// Copy the lower 32-bit integer in a to dst.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907#[cfg_attr(test, assert_instr(vmovd))]
26908pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26909    unsafe { simd_extract!(a.as_i32x16(), 0) }
26910}
26911
26912/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26913///
26914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26915#[inline]
26916#[target_feature(enable = "avx512f")]
26917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26918pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26919    unsafe { simd_extract!(a, 0) }
26920}
26921
26922/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26923///
26924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26925#[inline]
26926#[target_feature(enable = "avx512f")]
26927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26928pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26929    unsafe { simd_extract!(a, 0) }
26930}
26931
26932/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26933///
26934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26935#[inline]
26936#[target_feature(enable = "avx512f")]
26937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26938#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26939pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26940    unsafe {
26941        let a = _mm512_castsi128_si512(a).as_i32x16();
26942        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
26943        transmute(ret)
26944    }
26945}
26946
26947/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26948///
26949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26950#[inline]
26951#[target_feature(enable = "avx512f")]
26952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26953#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26954pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26955    unsafe {
26956        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26957        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
26958    }
26959}
26960
26961/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26962///
26963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
26964#[inline]
26965#[target_feature(enable = "avx512f")]
26966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26967#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26968pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
26969    unsafe {
26970        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26971        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
26972    }
26973}
26974
26975/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26976///
26977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
26978#[inline]
26979#[target_feature(enable = "avx512f,avx512vl")]
26980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26981#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26982pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
26983    unsafe {
26984        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
26985        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
26986    }
26987}
26988
26989/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26990///
26991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
26992#[inline]
26993#[target_feature(enable = "avx512f,avx512vl")]
26994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26995#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26996pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
26997    unsafe {
26998        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
26999        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27000    }
27001}
27002
27003/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27004///
27005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27006#[inline]
27007#[target_feature(enable = "avx512f,avx512vl")]
27008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27009#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27010pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27011    unsafe {
27012        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27013        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27014    }
27015}
27016
27017/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27018///
27019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27020#[inline]
27021#[target_feature(enable = "avx512f,avx512vl")]
27022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27023#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27024pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27025    unsafe {
27026        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27027        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27028    }
27029}
27030
27031/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27032///
27033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27034#[inline]
27035#[target_feature(enable = "avx512f")]
27036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27037#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27038pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27039    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27040}
27041
27042/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27043///
27044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27045#[inline]
27046#[target_feature(enable = "avx512f")]
27047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27048#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27049pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27050    unsafe {
27051        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27052        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27053    }
27054}
27055
27056/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27057///
27058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27059#[inline]
27060#[target_feature(enable = "avx512f")]
27061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27062#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27063pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27064    unsafe {
27065        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27066        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27067    }
27068}
27069
27070/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27073#[inline]
27074#[target_feature(enable = "avx512f,avx512vl")]
27075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27076#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27077pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27078    unsafe {
27079        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27080        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27081    }
27082}
27083
27084/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27085///
27086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27087#[inline]
27088#[target_feature(enable = "avx512f,avx512vl")]
27089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27090#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27091pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27092    unsafe {
27093        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27094        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27095    }
27096}
27097
27098/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27099///
27100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27101#[inline]
27102#[target_feature(enable = "avx512f,avx512vl")]
27103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27104#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27105pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27106    unsafe {
27107        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27108        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27109    }
27110}
27111
27112/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27113///
27114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27115#[inline]
27116#[target_feature(enable = "avx512f,avx512vl")]
27117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27118#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27119pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27120    unsafe {
27121        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27122        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27123    }
27124}
27125
27126/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27127///
27128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27129#[inline]
27130#[target_feature(enable = "avx512f")]
27131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27132#[cfg_attr(test, assert_instr(vbroadcastss))]
27133pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27134    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27135}
27136
27137/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27138///
27139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27140#[inline]
27141#[target_feature(enable = "avx512f")]
27142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27143#[cfg_attr(test, assert_instr(vbroadcastss))]
27144pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27145    unsafe {
27146        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27147        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27148    }
27149}
27150
27151/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27152///
27153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27154#[inline]
27155#[target_feature(enable = "avx512f")]
27156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27157#[cfg_attr(test, assert_instr(vbroadcastss))]
27158pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27159    unsafe {
27160        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27161        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27162    }
27163}
27164
27165/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27166///
27167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27168#[inline]
27169#[target_feature(enable = "avx512f,avx512vl")]
27170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27171#[cfg_attr(test, assert_instr(vbroadcastss))]
27172pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27173    unsafe {
27174        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27175        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27176    }
27177}
27178
27179/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27180///
27181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27182#[inline]
27183#[target_feature(enable = "avx512f,avx512vl")]
27184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27185#[cfg_attr(test, assert_instr(vbroadcastss))]
27186pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27187    unsafe {
27188        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27189        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27190    }
27191}
27192
27193/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27196#[inline]
27197#[target_feature(enable = "avx512f,avx512vl")]
27198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27199#[cfg_attr(test, assert_instr(vbroadcastss))]
27200pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27201    unsafe {
27202        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27203        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27204    }
27205}
27206
27207/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27208///
27209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27210#[inline]
27211#[target_feature(enable = "avx512f,avx512vl")]
27212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27213#[cfg_attr(test, assert_instr(vbroadcastss))]
27214pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27215    unsafe {
27216        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27217        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27218    }
27219}
27220
27221/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27222///
27223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27224#[inline]
27225#[target_feature(enable = "avx512f")]
27226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27227#[cfg_attr(test, assert_instr(vbroadcastsd))]
27228pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27229    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27230}
27231
27232/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27233///
27234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27235#[inline]
27236#[target_feature(enable = "avx512f")]
27237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27238#[cfg_attr(test, assert_instr(vbroadcastsd))]
27239pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27240    unsafe {
27241        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27242        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27243    }
27244}
27245
27246/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27247///
27248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27249#[inline]
27250#[target_feature(enable = "avx512f")]
27251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27252#[cfg_attr(test, assert_instr(vbroadcastsd))]
27253pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27254    unsafe {
27255        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27256        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27257    }
27258}
27259
27260/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27261///
27262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27263#[inline]
27264#[target_feature(enable = "avx512f,avx512vl")]
27265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27266#[cfg_attr(test, assert_instr(vbroadcastsd))]
27267pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27268    unsafe {
27269        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27270        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27271    }
27272}
27273
27274/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27275///
27276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27277#[inline]
27278#[target_feature(enable = "avx512f,avx512vl")]
27279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27280#[cfg_attr(test, assert_instr(vbroadcastsd))]
27281pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27282    unsafe {
27283        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27284        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27285    }
27286}
27287
27288/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27289///
27290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27291#[inline]
27292#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27294pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27295    unsafe {
27296        let a = a.as_i32x4();
27297        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27298        transmute(ret)
27299    }
27300}
27301
27302/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27303///
27304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27305#[inline]
27306#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27308pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27309    unsafe {
27310        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27311        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27312    }
27313}
27314
27315/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27316///
27317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27318#[inline]
27319#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27321pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27322    unsafe {
27323        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27324        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27325    }
27326}
27327
27328/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27329///
27330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27331#[inline]
27332#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27334pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27335    unsafe {
27336        let a = a.as_i32x4();
27337        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27338        transmute(ret)
27339    }
27340}
27341
27342/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27345#[inline]
27346#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27348pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27349    unsafe {
27350        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27351        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27352    }
27353}
27354
27355/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27356///
27357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27358#[inline]
27359#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27362    unsafe {
27363        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27364        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27365    }
27366}
27367
27368/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27369///
27370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27371#[inline]
27372#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27374pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27375    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27376}
27377
27378/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27379///
27380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27381#[inline]
27382#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27384pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27385    unsafe {
27386        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27387        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27388    }
27389}
27390
27391/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27392///
27393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27394#[inline]
27395#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27397pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27398    unsafe {
27399        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27400        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27401    }
27402}
27403
27404/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27405///
27406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27407#[inline]
27408#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27410pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27411    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27412}
27413
27414/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27415///
27416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27417#[inline]
27418#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27420pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27421    unsafe {
27422        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27423        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27424    }
27425}
27426
27427/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27428///
27429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27430#[inline]
27431#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27433pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27434    unsafe {
27435        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27436        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27437    }
27438}
27439
27440/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27441///
27442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27443#[inline]
27444#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27446pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27447    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27448}
27449
27450/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27451///
27452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27453#[inline]
27454#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27456pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27457    unsafe {
27458        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27459        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27460    }
27461}
27462
27463/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27464///
27465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27466#[inline]
27467#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27469pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27470    unsafe {
27471        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27472        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27473    }
27474}
27475
27476/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27477///
27478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27479#[inline]
27480#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27482pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27483    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27484}
27485
27486/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27487///
27488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27489#[inline]
27490#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27492pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27493    unsafe {
27494        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27495        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27496    }
27497}
27498
27499/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27500///
27501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27502#[inline]
27503#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27505pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27506    unsafe {
27507        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27508        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27509    }
27510}
27511
27512/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27513///
27514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27515#[inline]
27516#[target_feature(enable = "avx512f")]
27517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27518#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27519pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27520    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27521}
27522
27523/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27524///
27525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27526#[inline]
27527#[target_feature(enable = "avx512f,avx512vl")]
27528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27529#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27530pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27531    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27532}
27533
27534/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27535///
27536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27537#[inline]
27538#[target_feature(enable = "avx512f,avx512vl")]
27539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27540#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27541pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27542    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27543}
27544
27545/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27546///
27547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27548#[inline]
27549#[target_feature(enable = "avx512f")]
27550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27551#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27552pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27553    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27554}
27555
27556/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27557///
27558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27559#[inline]
27560#[target_feature(enable = "avx512f,avx512vl")]
27561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27562#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27563pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27564    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27565}
27566
27567/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27568///
27569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27570#[inline]
27571#[target_feature(enable = "avx512f,avx512vl")]
27572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27573#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27574pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27575    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27576}
27577
27578/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27579///
27580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27581#[inline]
27582#[target_feature(enable = "avx512f")]
27583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27584#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27585pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27586    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27587}
27588
27589/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27590///
27591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27592#[inline]
27593#[target_feature(enable = "avx512f,avx512vl")]
27594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27595#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27596pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27597    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27598}
27599
27600/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27601///
27602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27603#[inline]
27604#[target_feature(enable = "avx512f,avx512vl")]
27605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27606#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27607pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27609}
27610
27611/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27612///
27613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27614#[inline]
27615#[target_feature(enable = "avx512f")]
27616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27617#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27618pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27619    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27620}
27621
27622/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27623///
27624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27625#[inline]
27626#[target_feature(enable = "avx512f,avx512vl")]
27627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27628#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27629pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27630    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27631}
27632
27633/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27634///
27635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27636#[inline]
27637#[target_feature(enable = "avx512f,avx512vl")]
27638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27639#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27640pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27641    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27642}
27643
27644/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27645///
27646/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27647///
27648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27649#[inline]
27650#[target_feature(enable = "avx512f")]
27651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27652#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27653#[rustc_legacy_const_generics(2)]
27654pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27655    unsafe {
27656        static_assert_uimm_bits!(IMM8, 8);
27657        let a = a.as_i32x16();
27658        let b = b.as_i32x16();
27659        let imm8: i32 = IMM8 % 16;
27660        let r: i32x16 = match imm8 {
27661            0 => simd_shuffle!(
27662                a,
27663                b,
27664                [
27665                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27666                ],
27667            ),
27668            1 => simd_shuffle!(
27669                a,
27670                b,
27671                [
27672                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27673                ],
27674            ),
27675            2 => simd_shuffle!(
27676                a,
27677                b,
27678                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27679            ),
27680            3 => simd_shuffle!(
27681                a,
27682                b,
27683                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27684            ),
27685            4 => simd_shuffle!(
27686                a,
27687                b,
27688                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27689            ),
27690            5 => simd_shuffle!(
27691                a,
27692                b,
27693                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27694            ),
27695            6 => simd_shuffle!(
27696                a,
27697                b,
27698                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27699            ),
27700            7 => simd_shuffle!(
27701                a,
27702                b,
27703                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27704            ),
27705            8 => simd_shuffle!(
27706                a,
27707                b,
27708                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27709            ),
27710            9 => simd_shuffle!(
27711                a,
27712                b,
27713                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27714            ),
27715            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27716            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27717            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27718            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27719            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27720            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27721            _ => unreachable_unchecked(),
27722        };
27723        transmute(r)
27724    }
27725}
27726
27727/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27728///
27729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27730#[inline]
27731#[target_feature(enable = "avx512f")]
27732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27733#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27734#[rustc_legacy_const_generics(4)]
27735pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27736    src: __m512i,
27737    k: __mmask16,
27738    a: __m512i,
27739    b: __m512i,
27740) -> __m512i {
27741    unsafe {
27742        static_assert_uimm_bits!(IMM8, 8);
27743        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27744        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27745    }
27746}
27747
27748/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27749///
27750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27751#[inline]
27752#[target_feature(enable = "avx512f")]
27753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27754#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27755#[rustc_legacy_const_generics(3)]
27756pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27757    unsafe {
27758        static_assert_uimm_bits!(IMM8, 8);
27759        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27760        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27761    }
27762}
27763
27764/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27765///
27766/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27767///
27768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27769#[inline]
27770#[target_feature(enable = "avx512f,avx512vl")]
27771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27772#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27773#[rustc_legacy_const_generics(2)]
27774pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27775    unsafe {
27776        static_assert_uimm_bits!(IMM8, 8);
27777        let a = a.as_i32x8();
27778        let b = b.as_i32x8();
27779        let imm8: i32 = IMM8 % 8;
27780        let r: i32x8 = match imm8 {
27781            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27782            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27783            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27784            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27785            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27786            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27787            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27788            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27789            _ => unreachable_unchecked(),
27790        };
27791        transmute(r)
27792    }
27793}
27794
27795/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27796///
27797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27798#[inline]
27799#[target_feature(enable = "avx512f,avx512vl")]
27800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27801#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27802#[rustc_legacy_const_generics(4)]
27803pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27804    src: __m256i,
27805    k: __mmask8,
27806    a: __m256i,
27807    b: __m256i,
27808) -> __m256i {
27809    unsafe {
27810        static_assert_uimm_bits!(IMM8, 8);
27811        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27812        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27813    }
27814}
27815
27816/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27817///
27818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27819#[inline]
27820#[target_feature(enable = "avx512f,avx512vl")]
27821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27822#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27823#[rustc_legacy_const_generics(3)]
27824pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27825    unsafe {
27826        static_assert_uimm_bits!(IMM8, 8);
27827        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27828        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27829    }
27830}
27831
27832/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27833///
27834/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27835///
27836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27837#[inline]
27838#[target_feature(enable = "avx512f,avx512vl")]
27839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27840#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27841#[rustc_legacy_const_generics(2)]
27842pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27843    unsafe {
27844        static_assert_uimm_bits!(IMM8, 8);
27845        let a = a.as_i32x4();
27846        let b = b.as_i32x4();
27847        let imm8: i32 = IMM8 % 4;
27848        let r: i32x4 = match imm8 {
27849            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27850            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27851            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27852            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27853            _ => unreachable_unchecked(),
27854        };
27855        transmute(r)
27856    }
27857}
27858
27859/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27860///
27861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27862#[inline]
27863#[target_feature(enable = "avx512f,avx512vl")]
27864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27865#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27866#[rustc_legacy_const_generics(4)]
27867pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27868    src: __m128i,
27869    k: __mmask8,
27870    a: __m128i,
27871    b: __m128i,
27872) -> __m128i {
27873    unsafe {
27874        static_assert_uimm_bits!(IMM8, 8);
27875        let r = _mm_alignr_epi32::<IMM8>(a, b);
27876        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27877    }
27878}
27879
27880/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27881///
27882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27883#[inline]
27884#[target_feature(enable = "avx512f,avx512vl")]
27885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27886#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27887#[rustc_legacy_const_generics(3)]
27888pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27889    unsafe {
27890        static_assert_uimm_bits!(IMM8, 8);
27891        let r = _mm_alignr_epi32::<IMM8>(a, b);
27892        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27893    }
27894}
27895
27896/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27897///
27898/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27899///
27900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27901#[inline]
27902#[target_feature(enable = "avx512f")]
27903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27904#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27905#[rustc_legacy_const_generics(2)]
27906pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27907    unsafe {
27908        static_assert_uimm_bits!(IMM8, 8);
27909        let imm8: i32 = IMM8 % 8;
27910        let r: i64x8 = match imm8 {
27911            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27912            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27913            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27914            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27915            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27916            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27917            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27918            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27919            _ => unreachable_unchecked(),
27920        };
27921        transmute(r)
27922    }
27923}
27924
27925/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27926///
27927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27928#[inline]
27929#[target_feature(enable = "avx512f")]
27930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27931#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27932#[rustc_legacy_const_generics(4)]
27933pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27934    src: __m512i,
27935    k: __mmask8,
27936    a: __m512i,
27937    b: __m512i,
27938) -> __m512i {
27939    unsafe {
27940        static_assert_uimm_bits!(IMM8, 8);
27941        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27942        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
27943    }
27944}
27945
27946/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27947///
27948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27949#[inline]
27950#[target_feature(enable = "avx512f")]
27951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27952#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27953#[rustc_legacy_const_generics(3)]
27954pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27955    unsafe {
27956        static_assert_uimm_bits!(IMM8, 8);
27957        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27958        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
27959    }
27960}
27961
27962/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
27963///
27964/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
27965///
27966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
27967#[inline]
27968#[target_feature(enable = "avx512f,avx512vl")]
27969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27970#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27971#[rustc_legacy_const_generics(2)]
27972pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27973    unsafe {
27974        static_assert_uimm_bits!(IMM8, 8);
27975        let imm8: i32 = IMM8 % 4;
27976        let r: i64x4 = match imm8 {
27977            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27978            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27979            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27980            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27981            _ => unreachable_unchecked(),
27982        };
27983        transmute(r)
27984    }
27985}
27986
27987/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27988///
27989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
27990#[inline]
27991#[target_feature(enable = "avx512f,avx512vl")]
27992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27993#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27994#[rustc_legacy_const_generics(4)]
27995pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
27996    src: __m256i,
27997    k: __mmask8,
27998    a: __m256i,
27999    b: __m256i,
28000) -> __m256i {
28001    unsafe {
28002        static_assert_uimm_bits!(IMM8, 8);
28003        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28004        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28005    }
28006}
28007
28008/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28009///
28010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28011#[inline]
28012#[target_feature(enable = "avx512f,avx512vl")]
28013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28014#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28015#[rustc_legacy_const_generics(3)]
28016pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28017    unsafe {
28018        static_assert_uimm_bits!(IMM8, 8);
28019        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28020        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28021    }
28022}
28023
28024/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28025///
28026/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28027///
28028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28029#[inline]
28030#[target_feature(enable = "avx512f,avx512vl")]
28031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28032#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28033#[rustc_legacy_const_generics(2)]
28034pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28035    unsafe {
28036        static_assert_uimm_bits!(IMM8, 8);
28037        let imm8: i32 = IMM8 % 2;
28038        let r: i64x2 = match imm8 {
28039            0 => simd_shuffle!(a, b, [2, 3]),
28040            1 => simd_shuffle!(a, b, [3, 0]),
28041            _ => unreachable_unchecked(),
28042        };
28043        transmute(r)
28044    }
28045}
28046
28047/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28048///
28049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28050#[inline]
28051#[target_feature(enable = "avx512f,avx512vl")]
28052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28053#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28054#[rustc_legacy_const_generics(4)]
28055pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28056    src: __m128i,
28057    k: __mmask8,
28058    a: __m128i,
28059    b: __m128i,
28060) -> __m128i {
28061    unsafe {
28062        static_assert_uimm_bits!(IMM8, 8);
28063        let r = _mm_alignr_epi64::<IMM8>(a, b);
28064        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28065    }
28066}
28067
28068/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28069///
28070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28071#[inline]
28072#[target_feature(enable = "avx512f,avx512vl")]
28073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28074#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28075#[rustc_legacy_const_generics(3)]
28076pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28077    unsafe {
28078        static_assert_uimm_bits!(IMM8, 8);
28079        let r = _mm_alignr_epi64::<IMM8>(a, b);
28080        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28081    }
28082}
28083
28084/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28085///
28086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28087#[inline]
28088#[target_feature(enable = "avx512f")]
28089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28090#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28091pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28092    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28093}
28094
28095/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28096///
28097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28098#[inline]
28099#[target_feature(enable = "avx512f")]
28100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28101#[cfg_attr(test, assert_instr(vpandd))]
28102pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28103    unsafe {
28104        let and = _mm512_and_epi32(a, b).as_i32x16();
28105        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28106    }
28107}
28108
28109/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28110///
28111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28112#[inline]
28113#[target_feature(enable = "avx512f")]
28114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28115#[cfg_attr(test, assert_instr(vpandd))]
28116pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28117    unsafe {
28118        let and = _mm512_and_epi32(a, b).as_i32x16();
28119        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28120    }
28121}
28122
28123/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28124///
28125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28126#[inline]
28127#[target_feature(enable = "avx512f,avx512vl")]
28128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28129#[cfg_attr(test, assert_instr(vpandd))]
28130pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28131    unsafe {
28132        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28133        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28134    }
28135}
28136
28137/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28138///
28139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28140#[inline]
28141#[target_feature(enable = "avx512f,avx512vl")]
28142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28143#[cfg_attr(test, assert_instr(vpandd))]
28144pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28145    unsafe {
28146        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28147        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28148    }
28149}
28150
28151/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28152///
28153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28154#[inline]
28155#[target_feature(enable = "avx512f,avx512vl")]
28156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28157#[cfg_attr(test, assert_instr(vpandd))]
28158pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28159    unsafe {
28160        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28161        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28162    }
28163}
28164
28165/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28171#[cfg_attr(test, assert_instr(vpandd))]
28172pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28173    unsafe {
28174        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28175        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28176    }
28177}
28178
28179/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28180///
28181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28182#[inline]
28183#[target_feature(enable = "avx512f")]
28184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28185#[cfg_attr(test, assert_instr(vpandq))]
28186pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28187    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28188}
28189
28190/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28191///
28192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28193#[inline]
28194#[target_feature(enable = "avx512f")]
28195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28196#[cfg_attr(test, assert_instr(vpandq))]
28197pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28198    unsafe {
28199        let and = _mm512_and_epi64(a, b).as_i64x8();
28200        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28201    }
28202}
28203
28204/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28205///
28206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28207#[inline]
28208#[target_feature(enable = "avx512f")]
28209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28210#[cfg_attr(test, assert_instr(vpandq))]
28211pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28212    unsafe {
28213        let and = _mm512_and_epi64(a, b).as_i64x8();
28214        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28215    }
28216}
28217
28218/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28219///
28220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28221#[inline]
28222#[target_feature(enable = "avx512f,avx512vl")]
28223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28224#[cfg_attr(test, assert_instr(vpandq))]
28225pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28226    unsafe {
28227        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28228        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28229    }
28230}
28231
28232/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28233///
28234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28235#[inline]
28236#[target_feature(enable = "avx512f,avx512vl")]
28237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28238#[cfg_attr(test, assert_instr(vpandq))]
28239pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28240    unsafe {
28241        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28242        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28243    }
28244}
28245
28246/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28247///
28248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28249#[inline]
28250#[target_feature(enable = "avx512f,avx512vl")]
28251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28252#[cfg_attr(test, assert_instr(vpandq))]
28253pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28254    unsafe {
28255        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28256        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28257    }
28258}
28259
28260/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28261///
28262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28263#[inline]
28264#[target_feature(enable = "avx512f,avx512vl")]
28265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28266#[cfg_attr(test, assert_instr(vpandq))]
28267pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28268    unsafe {
28269        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28270        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28271    }
28272}
28273
28274/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28275///
28276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28277#[inline]
28278#[target_feature(enable = "avx512f")]
28279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28280#[cfg_attr(test, assert_instr(vpandq))]
28281pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28282    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28283}
28284
28285/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28286///
28287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28288#[inline]
28289#[target_feature(enable = "avx512f")]
28290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28291#[cfg_attr(test, assert_instr(vporq))]
28292pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28293    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28294}
28295
28296/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28297///
28298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28299#[inline]
28300#[target_feature(enable = "avx512f")]
28301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28302#[cfg_attr(test, assert_instr(vpord))]
28303pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28304    unsafe {
28305        let or = _mm512_or_epi32(a, b).as_i32x16();
28306        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28307    }
28308}
28309
28310/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28311///
28312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28313#[inline]
28314#[target_feature(enable = "avx512f")]
28315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28316#[cfg_attr(test, assert_instr(vpord))]
28317pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28318    unsafe {
28319        let or = _mm512_or_epi32(a, b).as_i32x16();
28320        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28321    }
28322}
28323
28324/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28327#[inline]
28328#[target_feature(enable = "avx512f,avx512vl")]
28329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28330#[cfg_attr(test, assert_instr(vor))] //should be vpord
28331pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28332    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28333}
28334
28335/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28336///
28337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28338#[inline]
28339#[target_feature(enable = "avx512f,avx512vl")]
28340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28341#[cfg_attr(test, assert_instr(vpord))]
28342pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28343    unsafe {
28344        let or = _mm256_or_epi32(a, b).as_i32x8();
28345        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28346    }
28347}
28348
28349/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28350///
28351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28352#[inline]
28353#[target_feature(enable = "avx512f,avx512vl")]
28354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28355#[cfg_attr(test, assert_instr(vpord))]
28356pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28357    unsafe {
28358        let or = _mm256_or_epi32(a, b).as_i32x8();
28359        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28360    }
28361}
28362
28363/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28364///
28365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28366#[inline]
28367#[target_feature(enable = "avx512f,avx512vl")]
28368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28369#[cfg_attr(test, assert_instr(vor))] //should be vpord
28370pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28371    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28372}
28373
28374/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28375///
28376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28377#[inline]
28378#[target_feature(enable = "avx512f,avx512vl")]
28379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28380#[cfg_attr(test, assert_instr(vpord))]
28381pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28382    unsafe {
28383        let or = _mm_or_epi32(a, b).as_i32x4();
28384        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28385    }
28386}
28387
28388/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28389///
28390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28391#[inline]
28392#[target_feature(enable = "avx512f,avx512vl")]
28393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28394#[cfg_attr(test, assert_instr(vpord))]
28395pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28396    unsafe {
28397        let or = _mm_or_epi32(a, b).as_i32x4();
28398        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28399    }
28400}
28401
28402/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28403///
28404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28405#[inline]
28406#[target_feature(enable = "avx512f")]
28407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28408#[cfg_attr(test, assert_instr(vporq))]
28409pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28410    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28411}
28412
28413/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28414///
28415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28416#[inline]
28417#[target_feature(enable = "avx512f")]
28418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28419#[cfg_attr(test, assert_instr(vporq))]
28420pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28421    unsafe {
28422        let or = _mm512_or_epi64(a, b).as_i64x8();
28423        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28424    }
28425}
28426
28427/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28428///
28429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28430#[inline]
28431#[target_feature(enable = "avx512f")]
28432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28433#[cfg_attr(test, assert_instr(vporq))]
28434pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28435    unsafe {
28436        let or = _mm512_or_epi64(a, b).as_i64x8();
28437        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28438    }
28439}
28440
28441/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28444#[inline]
28445#[target_feature(enable = "avx512f,avx512vl")]
28446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28447#[cfg_attr(test, assert_instr(vor))] //should be vporq
28448pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28449    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28450}
28451
28452/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28453///
28454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28455#[inline]
28456#[target_feature(enable = "avx512f,avx512vl")]
28457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28458#[cfg_attr(test, assert_instr(vporq))]
28459pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28460    unsafe {
28461        let or = _mm256_or_epi64(a, b).as_i64x4();
28462        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28463    }
28464}
28465
28466/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28467///
28468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28469#[inline]
28470#[target_feature(enable = "avx512f,avx512vl")]
28471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28472#[cfg_attr(test, assert_instr(vporq))]
28473pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28474    unsafe {
28475        let or = _mm256_or_epi64(a, b).as_i64x4();
28476        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28477    }
28478}
28479
28480/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28481///
28482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28483#[inline]
28484#[target_feature(enable = "avx512f,avx512vl")]
28485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28486#[cfg_attr(test, assert_instr(vor))] //should be vporq
28487pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28488    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28489}
28490
28491/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28492///
28493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28494#[inline]
28495#[target_feature(enable = "avx512f,avx512vl")]
28496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28497#[cfg_attr(test, assert_instr(vporq))]
28498pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28499    unsafe {
28500        let or = _mm_or_epi64(a, b).as_i64x2();
28501        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28502    }
28503}
28504
28505/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28506///
28507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28508#[inline]
28509#[target_feature(enable = "avx512f,avx512vl")]
28510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28511#[cfg_attr(test, assert_instr(vporq))]
28512pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28513    unsafe {
28514        let or = _mm_or_epi64(a, b).as_i64x2();
28515        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28516    }
28517}
28518
28519/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28520///
28521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28522#[inline]
28523#[target_feature(enable = "avx512f")]
28524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28525#[cfg_attr(test, assert_instr(vporq))]
28526pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28527    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28528}
28529
28530/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28531///
28532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28533#[inline]
28534#[target_feature(enable = "avx512f")]
28535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28536#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28537pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28538    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28539}
28540
28541/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28542///
28543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28544#[inline]
28545#[target_feature(enable = "avx512f")]
28546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28547#[cfg_attr(test, assert_instr(vpxord))]
28548pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28549    unsafe {
28550        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28551        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28552    }
28553}
28554
28555/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28556///
28557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28558#[inline]
28559#[target_feature(enable = "avx512f")]
28560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28561#[cfg_attr(test, assert_instr(vpxord))]
28562pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28563    unsafe {
28564        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28565        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28566    }
28567}
28568
28569/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28570///
28571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28572#[inline]
28573#[target_feature(enable = "avx512f,avx512vl")]
28574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28575#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28576pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28577    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28578}
28579
28580/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28581///
28582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28583#[inline]
28584#[target_feature(enable = "avx512f,avx512vl")]
28585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28586#[cfg_attr(test, assert_instr(vpxord))]
28587pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28588    unsafe {
28589        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28590        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28591    }
28592}
28593
28594/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28595///
28596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28597#[inline]
28598#[target_feature(enable = "avx512f,avx512vl")]
28599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28600#[cfg_attr(test, assert_instr(vpxord))]
28601pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28602    unsafe {
28603        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28604        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28605    }
28606}
28607
28608/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28609///
28610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28611#[inline]
28612#[target_feature(enable = "avx512f,avx512vl")]
28613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28614#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28615pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28616    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28617}
28618
28619/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28620///
28621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28622#[inline]
28623#[target_feature(enable = "avx512f,avx512vl")]
28624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28625#[cfg_attr(test, assert_instr(vpxord))]
28626pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28627    unsafe {
28628        let xor = _mm_xor_epi32(a, b).as_i32x4();
28629        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28630    }
28631}
28632
28633/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28634///
28635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28636#[inline]
28637#[target_feature(enable = "avx512f,avx512vl")]
28638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28639#[cfg_attr(test, assert_instr(vpxord))]
28640pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28641    unsafe {
28642        let xor = _mm_xor_epi32(a, b).as_i32x4();
28643        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28644    }
28645}
28646
28647/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28648///
28649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28650#[inline]
28651#[target_feature(enable = "avx512f")]
28652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28653#[cfg_attr(test, assert_instr(vpxorq))]
28654pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28655    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28656}
28657
28658/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28659///
28660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28661#[inline]
28662#[target_feature(enable = "avx512f")]
28663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28664#[cfg_attr(test, assert_instr(vpxorq))]
28665pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28666    unsafe {
28667        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28668        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28669    }
28670}
28671
28672/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28673///
28674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28675#[inline]
28676#[target_feature(enable = "avx512f")]
28677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28678#[cfg_attr(test, assert_instr(vpxorq))]
28679pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28680    unsafe {
28681        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28682        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28683    }
28684}
28685
28686/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28687///
28688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28689#[inline]
28690#[target_feature(enable = "avx512f,avx512vl")]
28691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28692#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28693pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28694    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28695}
28696
28697/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28698///
28699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28700#[inline]
28701#[target_feature(enable = "avx512f,avx512vl")]
28702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28703#[cfg_attr(test, assert_instr(vpxorq))]
28704pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28705    unsafe {
28706        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28707        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28708    }
28709}
28710
28711/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28712///
28713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28714#[inline]
28715#[target_feature(enable = "avx512f,avx512vl")]
28716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28717#[cfg_attr(test, assert_instr(vpxorq))]
28718pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28719    unsafe {
28720        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28721        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28722    }
28723}
28724
28725/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28731#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28732pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28733    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28734}
28735
28736/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28737///
28738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28739#[inline]
28740#[target_feature(enable = "avx512f,avx512vl")]
28741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28742#[cfg_attr(test, assert_instr(vpxorq))]
28743pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28744    unsafe {
28745        let xor = _mm_xor_epi64(a, b).as_i64x2();
28746        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28747    }
28748}
28749
28750/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28751///
28752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28753#[inline]
28754#[target_feature(enable = "avx512f,avx512vl")]
28755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28756#[cfg_attr(test, assert_instr(vpxorq))]
28757pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28758    unsafe {
28759        let xor = _mm_xor_epi64(a, b).as_i64x2();
28760        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28761    }
28762}
28763
28764/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28765///
28766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28767#[inline]
28768#[target_feature(enable = "avx512f")]
28769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28770#[cfg_attr(test, assert_instr(vpxorq))]
28771pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28772    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28773}
28774
28775/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28776///
28777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28778#[inline]
28779#[target_feature(enable = "avx512f")]
28780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28781#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28782pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28783    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28784}
28785
28786/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28787///
28788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28789#[inline]
28790#[target_feature(enable = "avx512f")]
28791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28792#[cfg_attr(test, assert_instr(vpandnd))]
28793pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28794    unsafe {
28795        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28796        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28797    }
28798}
28799
28800/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28801///
28802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28803#[inline]
28804#[target_feature(enable = "avx512f")]
28805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28806#[cfg_attr(test, assert_instr(vpandnd))]
28807pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28808    unsafe {
28809        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28810        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28811    }
28812}
28813
28814/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28815///
28816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28817#[inline]
28818#[target_feature(enable = "avx512f,avx512vl")]
28819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28820#[cfg_attr(test, assert_instr(vpandnd))]
28821pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28822    unsafe {
28823        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28824        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28825        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28826    }
28827}
28828
28829/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28830///
28831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28832#[inline]
28833#[target_feature(enable = "avx512f,avx512vl")]
28834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28835#[cfg_attr(test, assert_instr(vpandnd))]
28836pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28837    unsafe {
28838        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28839        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28840        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28841    }
28842}
28843
28844/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28845///
28846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28847#[inline]
28848#[target_feature(enable = "avx512f,avx512vl")]
28849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28850#[cfg_attr(test, assert_instr(vpandnd))]
28851pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28852    unsafe {
28853        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28854        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28855        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28856    }
28857}
28858
28859/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28862#[inline]
28863#[target_feature(enable = "avx512f,avx512vl")]
28864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28865#[cfg_attr(test, assert_instr(vpandnd))]
28866pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28867    unsafe {
28868        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28869        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28870        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28871    }
28872}
28873
28874/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28875///
28876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28877#[inline]
28878#[target_feature(enable = "avx512f")]
28879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28880#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28881pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28882    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28883}
28884
28885/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28886///
28887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28888#[inline]
28889#[target_feature(enable = "avx512f")]
28890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28891#[cfg_attr(test, assert_instr(vpandnq))]
28892pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28893    unsafe {
28894        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28895        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28896    }
28897}
28898
28899/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28900///
28901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28902#[inline]
28903#[target_feature(enable = "avx512f")]
28904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28905#[cfg_attr(test, assert_instr(vpandnq))]
28906pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28907    unsafe {
28908        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28909        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28910    }
28911}
28912
28913/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28914///
28915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28916#[inline]
28917#[target_feature(enable = "avx512f,avx512vl")]
28918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28919#[cfg_attr(test, assert_instr(vpandnq))]
28920pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28921    unsafe {
28922        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28923        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28924        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28925    }
28926}
28927
28928/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28929///
28930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28931#[inline]
28932#[target_feature(enable = "avx512f,avx512vl")]
28933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28934#[cfg_attr(test, assert_instr(vpandnq))]
28935pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28936    unsafe {
28937        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28938        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28939        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
28940    }
28941}
28942
28943/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28944///
28945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28946#[inline]
28947#[target_feature(enable = "avx512f,avx512vl")]
28948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28949#[cfg_attr(test, assert_instr(vpandnq))]
28950pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28951    unsafe {
28952        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28953        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28954        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
28955    }
28956}
28957
28958/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28959///
28960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
28961#[inline]
28962#[target_feature(enable = "avx512f,avx512vl")]
28963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28964#[cfg_attr(test, assert_instr(vpandnq))]
28965pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28966    unsafe {
28967        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28968        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28969        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
28970    }
28971}
28972
28973/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
28974///
28975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
28976#[inline]
28977#[target_feature(enable = "avx512f")]
28978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28979#[cfg_attr(test, assert_instr(vpandnq))]
28980pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
28981    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28982}
28983
28984/// Convert 16-bit mask a into an integer value, and store the result in dst.
28985///
28986/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
28987#[inline]
28988#[target_feature(enable = "avx512f")]
28989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28990pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
28991    a as u32
28992}
28993
28994/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
28995///
28996/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
28997#[inline]
28998#[target_feature(enable = "avx512f")]
28999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29000pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29001    a as __mmask16
29002}
29003
29004/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29005///
29006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29007#[inline]
29008#[target_feature(enable = "avx512f")]
29009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29010#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29011pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29012    a & b
29013}
29014
29015/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29016///
29017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29018#[inline]
29019#[target_feature(enable = "avx512f")]
29020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29021#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29022pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29023    a & b
29024}
29025
29026/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29027///
29028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29029#[inline]
29030#[target_feature(enable = "avx512f")]
29031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29032#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29033pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29034    a | b
29035}
29036
29037/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29038///
29039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29040#[inline]
29041#[target_feature(enable = "avx512f")]
29042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29043#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29044pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29045    a | b
29046}
29047
29048/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29049///
29050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29051#[inline]
29052#[target_feature(enable = "avx512f")]
29053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29054#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29055pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29056    a ^ b
29057}
29058
29059/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29060///
29061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29062#[inline]
29063#[target_feature(enable = "avx512f")]
29064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29065#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29066pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29067    a ^ b
29068}
29069
29070/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29071///
29072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29073#[inline]
29074#[target_feature(enable = "avx512f")]
29075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29076pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29077    a ^ 0b11111111_11111111
29078}
29079
29080/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29081///
29082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29083#[inline]
29084#[target_feature(enable = "avx512f")]
29085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29086pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29087    a ^ 0b11111111_11111111
29088}
29089
29090/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29091///
29092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29093#[inline]
29094#[target_feature(enable = "avx512f")]
29095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29096#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29097pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29098    _mm512_kand(_mm512_knot(a), b)
29099}
29100
29101/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29102///
29103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29104#[inline]
29105#[target_feature(enable = "avx512f")]
29106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29107#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29108pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29109    _mm512_kand(_mm512_knot(a), b)
29110}
29111
29112/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29113///
29114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29115#[inline]
29116#[target_feature(enable = "avx512f")]
29117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29118#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29119pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29120    _mm512_knot(_mm512_kxor(a, b))
29121}
29122
29123/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29124///
29125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29126#[inline]
29127#[target_feature(enable = "avx512f")]
29128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29129#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29130pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29131    _mm512_knot(_mm512_kxor(a, b))
29132}
29133
29134/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29135/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29136///
29137/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29138#[inline]
29139#[target_feature(enable = "avx512f")]
29140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29141pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29142    let tmp = _kor_mask16(a, b);
29143    *all_ones = (tmp == 0xffff) as u8;
29144    (tmp == 0) as u8
29145}
29146
29147/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29148/// store 0 in dst.
29149///
29150/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29151#[inline]
29152#[target_feature(enable = "avx512f")]
29153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29154pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29155    (_kor_mask16(a, b) == 0xffff) as u8
29156}
29157
29158/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29159/// store 0 in dst.
29160///
29161/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29162#[inline]
29163#[target_feature(enable = "avx512f")]
29164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29165pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29166    (_kor_mask16(a, b) == 0) as u8
29167}
29168
29169/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29170///
29171/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29172#[inline]
29173#[target_feature(enable = "avx512f")]
29174#[rustc_legacy_const_generics(1)]
29175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29176pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29177    a << COUNT
29178}
29179
29180/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29181///
29182/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29183#[inline]
29184#[target_feature(enable = "avx512f")]
29185#[rustc_legacy_const_generics(1)]
29186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29187pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29188    a >> COUNT
29189}
29190
29191/// Load 16-bit mask from memory
29192///
29193/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29194#[inline]
29195#[target_feature(enable = "avx512f")]
29196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29197pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29198    *mem_addr
29199}
29200
29201/// Store 16-bit mask to memory
29202///
29203/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29204#[inline]
29205#[target_feature(enable = "avx512f")]
29206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29207pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29208    *mem_addr = a;
29209}
29210
29211/// Copy 16-bit mask a to k.
29212///
29213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29214#[inline]
29215#[target_feature(enable = "avx512f")]
29216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29217#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29218pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29219    a
29220}
29221
29222/// Converts integer mask into bitmask, storing the result in dst.
29223///
29224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29225#[inline]
29226#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29228pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29229    mask as u16
29230}
29231
29232/// Converts bit mask k1 into an integer value, storing the results in dst.
29233///
29234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29235#[inline]
29236#[target_feature(enable = "avx512f")]
29237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29238#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29239pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29240    k1 as i32
29241}
29242
29243/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29244///
29245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29246#[inline]
29247#[target_feature(enable = "avx512f")]
29248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29249#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29250pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29251    ((a & 0xff) << 8) | (b & 0xff)
29252}
29253
29254/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29255///
29256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29257#[inline]
29258#[target_feature(enable = "avx512f")]
29259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29260#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29261pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29262    let r = (a | b) == 0b11111111_11111111;
29263    r as i32
29264}
29265
29266/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29267///
29268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29269#[inline]
29270#[target_feature(enable = "avx512f")]
29271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29272#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29273pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29274    let r = (a | b) == 0;
29275    r as i32
29276}
29277
29278/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29279///
29280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29281#[inline]
29282#[target_feature(enable = "avx512f")]
29283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29284#[cfg_attr(test, assert_instr(vptestmd))]
29285pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29286    let and = _mm512_and_epi32(a, b);
29287    let zero = _mm512_setzero_si512();
29288    _mm512_cmpneq_epi32_mask(and, zero)
29289}
29290
29291/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29292///
29293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29294#[inline]
29295#[target_feature(enable = "avx512f")]
29296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29297#[cfg_attr(test, assert_instr(vptestmd))]
29298pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29299    let and = _mm512_and_epi32(a, b);
29300    let zero = _mm512_setzero_si512();
29301    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29302}
29303
29304/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29305///
29306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29307#[inline]
29308#[target_feature(enable = "avx512f,avx512vl")]
29309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29310#[cfg_attr(test, assert_instr(vptestmd))]
29311pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29312    let and = _mm256_and_si256(a, b);
29313    let zero = _mm256_setzero_si256();
29314    _mm256_cmpneq_epi32_mask(and, zero)
29315}
29316
29317/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29318///
29319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29320#[inline]
29321#[target_feature(enable = "avx512f,avx512vl")]
29322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29323#[cfg_attr(test, assert_instr(vptestmd))]
29324pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29325    let and = _mm256_and_si256(a, b);
29326    let zero = _mm256_setzero_si256();
29327    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29328}
29329
29330/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29331///
29332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29333#[inline]
29334#[target_feature(enable = "avx512f,avx512vl")]
29335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29336#[cfg_attr(test, assert_instr(vptestmd))]
29337pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29338    let and = _mm_and_si128(a, b);
29339    let zero = _mm_setzero_si128();
29340    _mm_cmpneq_epi32_mask(and, zero)
29341}
29342
29343/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29344///
29345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29346#[inline]
29347#[target_feature(enable = "avx512f,avx512vl")]
29348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29349#[cfg_attr(test, assert_instr(vptestmd))]
29350pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29351    let and = _mm_and_si128(a, b);
29352    let zero = _mm_setzero_si128();
29353    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29354}
29355
29356/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29357///
29358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29359#[inline]
29360#[target_feature(enable = "avx512f")]
29361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29362#[cfg_attr(test, assert_instr(vptestmq))]
29363pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29364    let and = _mm512_and_epi64(a, b);
29365    let zero = _mm512_setzero_si512();
29366    _mm512_cmpneq_epi64_mask(and, zero)
29367}
29368
29369/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29370///
29371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29372#[inline]
29373#[target_feature(enable = "avx512f")]
29374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29375#[cfg_attr(test, assert_instr(vptestmq))]
29376pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29377    let and = _mm512_and_epi64(a, b);
29378    let zero = _mm512_setzero_si512();
29379    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29380}
29381
29382/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29383///
29384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29385#[inline]
29386#[target_feature(enable = "avx512f,avx512vl")]
29387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29388#[cfg_attr(test, assert_instr(vptestmq))]
29389pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29390    let and = _mm256_and_si256(a, b);
29391    let zero = _mm256_setzero_si256();
29392    _mm256_cmpneq_epi64_mask(and, zero)
29393}
29394
29395/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29396///
29397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29398#[inline]
29399#[target_feature(enable = "avx512f,avx512vl")]
29400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29401#[cfg_attr(test, assert_instr(vptestmq))]
29402pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29403    let and = _mm256_and_si256(a, b);
29404    let zero = _mm256_setzero_si256();
29405    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29406}
29407
29408/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29409///
29410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29411#[inline]
29412#[target_feature(enable = "avx512f,avx512vl")]
29413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29414#[cfg_attr(test, assert_instr(vptestmq))]
29415pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29416    let and = _mm_and_si128(a, b);
29417    let zero = _mm_setzero_si128();
29418    _mm_cmpneq_epi64_mask(and, zero)
29419}
29420
29421/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29422///
29423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29424#[inline]
29425#[target_feature(enable = "avx512f,avx512vl")]
29426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29427#[cfg_attr(test, assert_instr(vptestmq))]
29428pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29429    let and = _mm_and_si128(a, b);
29430    let zero = _mm_setzero_si128();
29431    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29432}
29433
29434/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29435///
29436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29437#[inline]
29438#[target_feature(enable = "avx512f")]
29439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29440#[cfg_attr(test, assert_instr(vptestnmd))]
29441pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29442    let and = _mm512_and_epi32(a, b);
29443    let zero = _mm512_setzero_si512();
29444    _mm512_cmpeq_epi32_mask(and, zero)
29445}
29446
29447/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29448///
29449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29450#[inline]
29451#[target_feature(enable = "avx512f")]
29452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29453#[cfg_attr(test, assert_instr(vptestnmd))]
29454pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29455    let and = _mm512_and_epi32(a, b);
29456    let zero = _mm512_setzero_si512();
29457    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29458}
29459
29460/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29461///
29462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29463#[inline]
29464#[target_feature(enable = "avx512f,avx512vl")]
29465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29466#[cfg_attr(test, assert_instr(vptestnmd))]
29467pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29468    let and = _mm256_and_si256(a, b);
29469    let zero = _mm256_setzero_si256();
29470    _mm256_cmpeq_epi32_mask(and, zero)
29471}
29472
29473/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29474///
29475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29476#[inline]
29477#[target_feature(enable = "avx512f,avx512vl")]
29478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29479#[cfg_attr(test, assert_instr(vptestnmd))]
29480pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29481    let and = _mm256_and_si256(a, b);
29482    let zero = _mm256_setzero_si256();
29483    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29484}
29485
29486/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29487///
29488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29489#[inline]
29490#[target_feature(enable = "avx512f,avx512vl")]
29491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29492#[cfg_attr(test, assert_instr(vptestnmd))]
29493pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29494    let and = _mm_and_si128(a, b);
29495    let zero = _mm_setzero_si128();
29496    _mm_cmpeq_epi32_mask(and, zero)
29497}
29498
29499/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29500///
29501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29502#[inline]
29503#[target_feature(enable = "avx512f,avx512vl")]
29504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29505#[cfg_attr(test, assert_instr(vptestnmd))]
29506pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29507    let and = _mm_and_si128(a, b);
29508    let zero = _mm_setzero_si128();
29509    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29510}
29511
29512/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29513///
29514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29515#[inline]
29516#[target_feature(enable = "avx512f")]
29517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29518#[cfg_attr(test, assert_instr(vptestnmq))]
29519pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29520    let and = _mm512_and_epi64(a, b);
29521    let zero = _mm512_setzero_si512();
29522    _mm512_cmpeq_epi64_mask(and, zero)
29523}
29524
29525/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29526///
29527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29528#[inline]
29529#[target_feature(enable = "avx512f")]
29530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29531#[cfg_attr(test, assert_instr(vptestnmq))]
29532pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29533    let and = _mm512_and_epi64(a, b);
29534    let zero = _mm512_setzero_si512();
29535    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29536}
29537
29538/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29539///
29540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29541#[inline]
29542#[target_feature(enable = "avx512f,avx512vl")]
29543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29544#[cfg_attr(test, assert_instr(vptestnmq))]
29545pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29546    let and = _mm256_and_si256(a, b);
29547    let zero = _mm256_setzero_si256();
29548    _mm256_cmpeq_epi64_mask(and, zero)
29549}
29550
29551/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29552///
29553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29554#[inline]
29555#[target_feature(enable = "avx512f,avx512vl")]
29556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29557#[cfg_attr(test, assert_instr(vptestnmq))]
29558pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29559    let and = _mm256_and_si256(a, b);
29560    let zero = _mm256_setzero_si256();
29561    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29562}
29563
29564/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29565///
29566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29567#[inline]
29568#[target_feature(enable = "avx512f,avx512vl")]
29569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29570#[cfg_attr(test, assert_instr(vptestnmq))]
29571pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29572    let and = _mm_and_si128(a, b);
29573    let zero = _mm_setzero_si128();
29574    _mm_cmpeq_epi64_mask(and, zero)
29575}
29576
29577/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29578///
29579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29580#[inline]
29581#[target_feature(enable = "avx512f,avx512vl")]
29582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29583#[cfg_attr(test, assert_instr(vptestnmq))]
29584pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29585    let and = _mm_and_si128(a, b);
29586    let zero = _mm_setzero_si128();
29587    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29588}
29589
29590/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29591///
29592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29593///
29594/// # Safety of non-temporal stores
29595///
29596/// After using this intrinsic, but before any other access to the memory that this intrinsic
29597/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29598/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29599/// return.
29600///
29601/// See [`_mm_sfence`] for details.
29602#[inline]
29603#[target_feature(enable = "avx512f")]
29604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29605#[cfg_attr(test, assert_instr(vmovntps))]
29606#[allow(clippy::cast_ptr_alignment)]
29607pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29608    crate::arch::asm!(
29609        vps!("vmovntps", ",{a}"),
29610        p = in(reg) mem_addr,
29611        a = in(zmm_reg) a,
29612        options(nostack, preserves_flags),
29613    );
29614}
29615
29616/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29617///
29618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29619///
29620/// # Safety of non-temporal stores
29621///
29622/// After using this intrinsic, but before any other access to the memory that this intrinsic
29623/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29624/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29625/// return.
29626///
29627/// See [`_mm_sfence`] for details.
29628#[inline]
29629#[target_feature(enable = "avx512f")]
29630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29631#[cfg_attr(test, assert_instr(vmovntpd))]
29632#[allow(clippy::cast_ptr_alignment)]
29633pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29634    crate::arch::asm!(
29635        vps!("vmovntpd", ",{a}"),
29636        p = in(reg) mem_addr,
29637        a = in(zmm_reg) a,
29638        options(nostack, preserves_flags),
29639    );
29640}
29641
29642/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29643///
29644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29645///
29646/// # Safety of non-temporal stores
29647///
29648/// After using this intrinsic, but before any other access to the memory that this intrinsic
29649/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29650/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29651/// return.
29652///
29653/// See [`_mm_sfence`] for details.
29654#[inline]
29655#[target_feature(enable = "avx512f")]
29656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29657#[cfg_attr(test, assert_instr(vmovntdq))]
29658#[allow(clippy::cast_ptr_alignment)]
29659pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
29660    crate::arch::asm!(
29661        vps!("vmovntdq", ",{a}"),
29662        p = in(reg) mem_addr,
29663        a = in(zmm_reg) a,
29664        options(nostack, preserves_flags),
29665    );
29666}
29667
29668/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29669/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29670/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29671///
29672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29673#[inline]
29674#[target_feature(enable = "avx512f")]
29675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29676pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29677    let dst: __m512i;
29678    crate::arch::asm!(
29679        vpl!("vmovntdqa {a}"),
29680        a = out(zmm_reg) dst,
29681        p = in(reg) mem_addr,
29682        options(pure, readonly, nostack, preserves_flags),
29683    );
29684    dst
29685}
29686
29687/// Sets packed 32-bit integers in `dst` with the supplied values.
29688///
29689/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29690#[inline]
29691#[target_feature(enable = "avx512f")]
29692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29693pub fn _mm512_set_ps(
29694    e0: f32,
29695    e1: f32,
29696    e2: f32,
29697    e3: f32,
29698    e4: f32,
29699    e5: f32,
29700    e6: f32,
29701    e7: f32,
29702    e8: f32,
29703    e9: f32,
29704    e10: f32,
29705    e11: f32,
29706    e12: f32,
29707    e13: f32,
29708    e14: f32,
29709    e15: f32,
29710) -> __m512 {
29711    _mm512_setr_ps(
29712        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29713    )
29714}
29715
29716/// Sets packed 32-bit integers in `dst` with the supplied values in
29717/// reverse order.
29718///
29719/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29720#[inline]
29721#[target_feature(enable = "avx512f")]
29722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29723pub fn _mm512_setr_ps(
29724    e0: f32,
29725    e1: f32,
29726    e2: f32,
29727    e3: f32,
29728    e4: f32,
29729    e5: f32,
29730    e6: f32,
29731    e7: f32,
29732    e8: f32,
29733    e9: f32,
29734    e10: f32,
29735    e11: f32,
29736    e12: f32,
29737    e13: f32,
29738    e14: f32,
29739    e15: f32,
29740) -> __m512 {
29741    unsafe {
29742        let r = f32x16::new(
29743            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29744        );
29745        transmute(r)
29746    }
29747}
29748
29749/// Broadcast 64-bit float `a` to all elements of `dst`.
29750///
29751/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29752#[inline]
29753#[target_feature(enable = "avx512f")]
29754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29755pub fn _mm512_set1_pd(a: f64) -> __m512d {
29756    unsafe { transmute(f64x8::splat(a)) }
29757}
29758
29759/// Broadcast 32-bit float `a` to all elements of `dst`.
29760///
29761/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29762#[inline]
29763#[target_feature(enable = "avx512f")]
29764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29765pub fn _mm512_set1_ps(a: f32) -> __m512 {
29766    unsafe { transmute(f32x16::splat(a)) }
29767}
29768
29769/// Sets packed 32-bit integers in `dst` with the supplied values.
29770///
29771/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29772#[inline]
29773#[target_feature(enable = "avx512f")]
29774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29775pub fn _mm512_set_epi32(
29776    e15: i32,
29777    e14: i32,
29778    e13: i32,
29779    e12: i32,
29780    e11: i32,
29781    e10: i32,
29782    e9: i32,
29783    e8: i32,
29784    e7: i32,
29785    e6: i32,
29786    e5: i32,
29787    e4: i32,
29788    e3: i32,
29789    e2: i32,
29790    e1: i32,
29791    e0: i32,
29792) -> __m512i {
29793    _mm512_setr_epi32(
29794        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29795    )
29796}
29797
29798/// Broadcast 8-bit integer a to all elements of dst.
29799///
29800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29801#[inline]
29802#[target_feature(enable = "avx512f")]
29803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29804pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29805    unsafe { transmute(i8x64::splat(a)) }
29806}
29807
29808/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29809///
29810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29811#[inline]
29812#[target_feature(enable = "avx512f")]
29813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29814pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29815    unsafe { transmute(i16x32::splat(a)) }
29816}
29817
29818/// Broadcast 32-bit integer `a` to all elements of `dst`.
29819///
29820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29821#[inline]
29822#[target_feature(enable = "avx512f")]
29823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29824pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29825    unsafe { transmute(i32x16::splat(a)) }
29826}
29827
29828/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29829///
29830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29831#[inline]
29832#[target_feature(enable = "avx512f")]
29833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29834#[cfg_attr(test, assert_instr(vpbroadcastd))]
29835pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29836    unsafe {
29837        let r = _mm512_set1_epi32(a).as_i32x16();
29838        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29839    }
29840}
29841
29842/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29843///
29844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29845#[inline]
29846#[target_feature(enable = "avx512f")]
29847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29848#[cfg_attr(test, assert_instr(vpbroadcastd))]
29849pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29850    unsafe {
29851        let r = _mm512_set1_epi32(a).as_i32x16();
29852        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29853    }
29854}
29855
29856/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29857///
29858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29859#[inline]
29860#[target_feature(enable = "avx512f,avx512vl")]
29861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29862#[cfg_attr(test, assert_instr(vpbroadcastd))]
29863pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29864    unsafe {
29865        let r = _mm256_set1_epi32(a).as_i32x8();
29866        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29867    }
29868}
29869
29870/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29871///
29872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29873#[inline]
29874#[target_feature(enable = "avx512f,avx512vl")]
29875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29876#[cfg_attr(test, assert_instr(vpbroadcastd))]
29877pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29878    unsafe {
29879        let r = _mm256_set1_epi32(a).as_i32x8();
29880        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29881    }
29882}
29883
29884/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29885///
29886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29887#[inline]
29888#[target_feature(enable = "avx512f,avx512vl")]
29889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29890#[cfg_attr(test, assert_instr(vpbroadcastd))]
29891pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29892    unsafe {
29893        let r = _mm_set1_epi32(a).as_i32x4();
29894        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29895    }
29896}
29897
29898/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29899///
29900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29901#[inline]
29902#[target_feature(enable = "avx512f,avx512vl")]
29903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29904#[cfg_attr(test, assert_instr(vpbroadcastd))]
29905pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29906    unsafe {
29907        let r = _mm_set1_epi32(a).as_i32x4();
29908        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29909    }
29910}
29911
29912/// Broadcast 64-bit integer `a` to all elements of `dst`.
29913///
29914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29915#[inline]
29916#[target_feature(enable = "avx512f")]
29917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29918pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29919    unsafe { transmute(i64x8::splat(a)) }
29920}
29921
29922/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29923///
29924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29925#[inline]
29926#[target_feature(enable = "avx512f")]
29927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29928#[cfg_attr(test, assert_instr(vpbroadcastq))]
29929pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29930    unsafe {
29931        let r = _mm512_set1_epi64(a).as_i64x8();
29932        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29933    }
29934}
29935
29936/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29937///
29938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29939#[inline]
29940#[target_feature(enable = "avx512f")]
29941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29942#[cfg_attr(test, assert_instr(vpbroadcastq))]
29943pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29944    unsafe {
29945        let r = _mm512_set1_epi64(a).as_i64x8();
29946        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
29947    }
29948}
29949
29950/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29951///
29952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29953#[inline]
29954#[target_feature(enable = "avx512f,avx512vl")]
29955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29956#[cfg_attr(test, assert_instr(vpbroadcastq))]
29957pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29958    unsafe {
29959        let r = _mm256_set1_epi64x(a).as_i64x4();
29960        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
29961    }
29962}
29963
29964/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29965///
29966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
29967#[inline]
29968#[target_feature(enable = "avx512f,avx512vl")]
29969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29970#[cfg_attr(test, assert_instr(vpbroadcastq))]
29971pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
29972    unsafe {
29973        let r = _mm256_set1_epi64x(a).as_i64x4();
29974        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
29975    }
29976}
29977
29978/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29979///
29980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
29981#[inline]
29982#[target_feature(enable = "avx512f,avx512vl")]
29983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29984#[cfg_attr(test, assert_instr(vpbroadcastq))]
29985pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
29986    unsafe {
29987        let r = _mm_set1_epi64x(a).as_i64x2();
29988        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
29989    }
29990}
29991
29992/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29993///
29994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
29995#[inline]
29996#[target_feature(enable = "avx512f,avx512vl")]
29997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29998#[cfg_attr(test, assert_instr(vpbroadcastq))]
29999pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30000    unsafe {
30001        let r = _mm_set1_epi64x(a).as_i64x2();
30002        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30003    }
30004}
30005
30006/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30007///
30008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30009#[inline]
30010#[target_feature(enable = "avx512f")]
30011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30012pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30013    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30014}
30015
30016/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30017///
30018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30019#[inline]
30020#[target_feature(enable = "avx512f")]
30021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30022pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30023    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30024}
30025
30026/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30027///
30028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30029#[inline]
30030#[target_feature(enable = "avx512f")]
30031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30032#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30033pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30034    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30035}
30036
30037/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30038///
30039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30040#[inline]
30041#[target_feature(enable = "avx512f")]
30042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30043#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30044pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30045    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30046}
30047
30048/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30049///
30050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30051#[inline]
30052#[target_feature(enable = "avx512f")]
30053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30054#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30055pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30056    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30057}
30058
30059/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30060///
30061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30062#[inline]
30063#[target_feature(enable = "avx512f")]
30064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30065#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30066pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30067    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30068}
30069
30070/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30071///
30072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30073#[inline]
30074#[target_feature(enable = "avx512f")]
30075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30076#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30077pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30078    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30079}
30080
30081/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30082///
30083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30084#[inline]
30085#[target_feature(enable = "avx512f")]
30086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30087#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30088pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30089    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30090}
30091
30092/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30093///
30094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30095#[inline]
30096#[target_feature(enable = "avx512f")]
30097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30098#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30099pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30100    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30101}
30102
30103/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30104///
30105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30106#[inline]
30107#[target_feature(enable = "avx512f")]
30108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30109#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30110pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30111    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30112}
30113
30114/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30115///
30116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30117#[inline]
30118#[target_feature(enable = "avx512f")]
30119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30120#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30121pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30122    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30123}
30124
30125/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30126///
30127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30128#[inline]
30129#[target_feature(enable = "avx512f")]
30130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30131#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30132pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30133    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30134}
30135
30136/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30137///
30138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30139#[inline]
30140#[target_feature(enable = "avx512f")]
30141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30142#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30143pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30144    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30145}
30146
30147/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30148///
30149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30150#[inline]
30151#[target_feature(enable = "avx512f")]
30152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30153#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30154pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30155    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30156}
30157
30158/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30159///
30160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30161#[inline]
30162#[target_feature(enable = "avx512f")]
30163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30164#[rustc_legacy_const_generics(2)]
30165#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30166pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30167    unsafe {
30168        static_assert_uimm_bits!(IMM8, 5);
30169        let neg_one = -1;
30170        let a = a.as_f32x16();
30171        let b = b.as_f32x16();
30172        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30173        r.cast_unsigned()
30174    }
30175}
30176
30177/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30178///
30179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30180#[inline]
30181#[target_feature(enable = "avx512f")]
30182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30183#[rustc_legacy_const_generics(3)]
30184#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30185pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30186    unsafe {
30187        static_assert_uimm_bits!(IMM8, 5);
30188        let a = a.as_f32x16();
30189        let b = b.as_f32x16();
30190        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30191        r.cast_unsigned()
30192    }
30193}
30194
30195/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30196///
30197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30198#[inline]
30199#[target_feature(enable = "avx512f,avx512vl")]
30200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30201#[rustc_legacy_const_generics(2)]
30202#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30203pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30204    unsafe {
30205        static_assert_uimm_bits!(IMM8, 5);
30206        let neg_one = -1;
30207        let a = a.as_f32x8();
30208        let b = b.as_f32x8();
30209        let r = vcmpps256(a, b, IMM8, neg_one);
30210        r.cast_unsigned()
30211    }
30212}
30213
30214/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30215///
30216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30217#[inline]
30218#[target_feature(enable = "avx512f,avx512vl")]
30219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30220#[rustc_legacy_const_generics(3)]
30221#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30222pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30223    unsafe {
30224        static_assert_uimm_bits!(IMM8, 5);
30225        let a = a.as_f32x8();
30226        let b = b.as_f32x8();
30227        let r = vcmpps256(a, b, IMM8, k1 as i8);
30228        r.cast_unsigned()
30229    }
30230}
30231
30232/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30233///
30234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30235#[inline]
30236#[target_feature(enable = "avx512f,avx512vl")]
30237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30238#[rustc_legacy_const_generics(2)]
30239#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30240pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30241    unsafe {
30242        static_assert_uimm_bits!(IMM8, 5);
30243        let neg_one = -1;
30244        let a = a.as_f32x4();
30245        let b = b.as_f32x4();
30246        let r = vcmpps128(a, b, IMM8, neg_one);
30247        r.cast_unsigned()
30248    }
30249}
30250
30251/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30252///
30253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30254#[inline]
30255#[target_feature(enable = "avx512f,avx512vl")]
30256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30257#[rustc_legacy_const_generics(3)]
30258#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30259pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30260    unsafe {
30261        static_assert_uimm_bits!(IMM8, 5);
30262        let a = a.as_f32x4();
30263        let b = b.as_f32x4();
30264        let r = vcmpps128(a, b, IMM8, k1 as i8);
30265        r.cast_unsigned()
30266    }
30267}
30268
30269/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30270/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30271///
30272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30273#[inline]
30274#[target_feature(enable = "avx512f")]
30275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30276#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30277#[rustc_legacy_const_generics(2, 3)]
30278pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30279    a: __m512,
30280    b: __m512,
30281) -> __mmask16 {
30282    unsafe {
30283        static_assert_uimm_bits!(IMM5, 5);
30284        static_assert_mantissas_sae!(SAE);
30285        let neg_one = -1;
30286        let a = a.as_f32x16();
30287        let b = b.as_f32x16();
30288        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30289        r.cast_unsigned()
30290    }
30291}
30292
30293/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30294/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30295///
30296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30297#[inline]
30298#[target_feature(enable = "avx512f")]
30299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30300#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30301#[rustc_legacy_const_generics(3, 4)]
30302pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30303    m: __mmask16,
30304    a: __m512,
30305    b: __m512,
30306) -> __mmask16 {
30307    unsafe {
30308        static_assert_uimm_bits!(IMM5, 5);
30309        static_assert_mantissas_sae!(SAE);
30310        let a = a.as_f32x16();
30311        let b = b.as_f32x16();
30312        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30313        r.cast_unsigned()
30314    }
30315}
30316
30317/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30318///
30319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30320#[inline]
30321#[target_feature(enable = "avx512f")]
30322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30323#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30324pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30325    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30326}
30327
30328/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30329///
30330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30331#[inline]
30332#[target_feature(enable = "avx512f")]
30333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30334#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30335pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30336    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30337}
30338
30339/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30340///
30341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30342#[inline]
30343#[target_feature(enable = "avx512f")]
30344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30345#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30346pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30347    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30348}
30349
30350/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30351///
30352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30353#[inline]
30354#[target_feature(enable = "avx512f")]
30355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30356#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30357pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30358    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30359}
30360
30361/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30362///
30363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30364#[inline]
30365#[target_feature(enable = "avx512f")]
30366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30367#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30368pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30369    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30370}
30371
30372/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30373///
30374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30375#[inline]
30376#[target_feature(enable = "avx512f")]
30377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30378#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30379pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30380    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30381}
30382
30383/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30384///
30385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30386#[inline]
30387#[target_feature(enable = "avx512f")]
30388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30389#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30390pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30391    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30392}
30393
30394/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30395///
30396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30397#[inline]
30398#[target_feature(enable = "avx512f")]
30399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30400#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30401pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30402    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30403}
30404
30405/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30406///
30407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30408#[inline]
30409#[target_feature(enable = "avx512f")]
30410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30411#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30412pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30413    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30414}
30415
30416/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30417///
30418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30419#[inline]
30420#[target_feature(enable = "avx512f")]
30421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30422#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30423pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30424    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30425}
30426
30427/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30428///
30429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30430#[inline]
30431#[target_feature(enable = "avx512f")]
30432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30433#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30434pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30435    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30436}
30437
30438/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30439///
30440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30441#[inline]
30442#[target_feature(enable = "avx512f")]
30443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30444#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30445pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30446    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30447}
30448
30449/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30450///
30451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30452#[inline]
30453#[target_feature(enable = "avx512f")]
30454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30455#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30456pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30457    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30458}
30459
30460/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30461///
30462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30463#[inline]
30464#[target_feature(enable = "avx512f")]
30465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30466#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30467pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30468    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30469}
30470
30471/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30472///
30473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30474#[inline]
30475#[target_feature(enable = "avx512f")]
30476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30477#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30478pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30479    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30480}
30481
30482/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30483///
30484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30485#[inline]
30486#[target_feature(enable = "avx512f")]
30487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30488#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30489pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30490    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30491}
30492
30493/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30494///
30495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30496#[inline]
30497#[target_feature(enable = "avx512f")]
30498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30499#[rustc_legacy_const_generics(2)]
30500#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30501pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30502    unsafe {
30503        static_assert_uimm_bits!(IMM8, 5);
30504        let neg_one = -1;
30505        let a = a.as_f64x8();
30506        let b = b.as_f64x8();
30507        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30508        r.cast_unsigned()
30509    }
30510}
30511
30512/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30513///
30514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30515#[inline]
30516#[target_feature(enable = "avx512f")]
30517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30518#[rustc_legacy_const_generics(3)]
30519#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30520pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30521    unsafe {
30522        static_assert_uimm_bits!(IMM8, 5);
30523        let a = a.as_f64x8();
30524        let b = b.as_f64x8();
30525        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30526        r.cast_unsigned()
30527    }
30528}
30529
30530/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30531///
30532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30533#[inline]
30534#[target_feature(enable = "avx512f,avx512vl")]
30535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30536#[rustc_legacy_const_generics(2)]
30537#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30538pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30539    unsafe {
30540        static_assert_uimm_bits!(IMM8, 5);
30541        let neg_one = -1;
30542        let a = a.as_f64x4();
30543        let b = b.as_f64x4();
30544        let r = vcmppd256(a, b, IMM8, neg_one);
30545        r.cast_unsigned()
30546    }
30547}
30548
30549/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30550///
30551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30552#[inline]
30553#[target_feature(enable = "avx512f,avx512vl")]
30554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30555#[rustc_legacy_const_generics(3)]
30556#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30557pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30558    unsafe {
30559        static_assert_uimm_bits!(IMM8, 5);
30560        let a = a.as_f64x4();
30561        let b = b.as_f64x4();
30562        let r = vcmppd256(a, b, IMM8, k1 as i8);
30563        r.cast_unsigned()
30564    }
30565}
30566
30567/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30568///
30569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30570#[inline]
30571#[target_feature(enable = "avx512f,avx512vl")]
30572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30573#[rustc_legacy_const_generics(2)]
30574#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30575pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30576    unsafe {
30577        static_assert_uimm_bits!(IMM8, 5);
30578        let neg_one = -1;
30579        let a = a.as_f64x2();
30580        let b = b.as_f64x2();
30581        let r = vcmppd128(a, b, IMM8, neg_one);
30582        r.cast_unsigned()
30583    }
30584}
30585
30586/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30587///
30588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30589#[inline]
30590#[target_feature(enable = "avx512f,avx512vl")]
30591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30592#[rustc_legacy_const_generics(3)]
30593#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30594pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30595    unsafe {
30596        static_assert_uimm_bits!(IMM8, 5);
30597        let a = a.as_f64x2();
30598        let b = b.as_f64x2();
30599        let r = vcmppd128(a, b, IMM8, k1 as i8);
30600        r.cast_unsigned()
30601    }
30602}
30603
30604/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30605/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30606///
30607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30608#[inline]
30609#[target_feature(enable = "avx512f")]
30610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30611#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30612#[rustc_legacy_const_generics(2, 3)]
30613pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30614    a: __m512d,
30615    b: __m512d,
30616) -> __mmask8 {
30617    unsafe {
30618        static_assert_uimm_bits!(IMM5, 5);
30619        static_assert_mantissas_sae!(SAE);
30620        let neg_one = -1;
30621        let a = a.as_f64x8();
30622        let b = b.as_f64x8();
30623        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30624        r.cast_unsigned()
30625    }
30626}
30627
30628/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30630///
30631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30632#[inline]
30633#[target_feature(enable = "avx512f")]
30634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30635#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30636#[rustc_legacy_const_generics(3, 4)]
30637pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30638    k1: __mmask8,
30639    a: __m512d,
30640    b: __m512d,
30641) -> __mmask8 {
30642    unsafe {
30643        static_assert_uimm_bits!(IMM5, 5);
30644        static_assert_mantissas_sae!(SAE);
30645        let a = a.as_f64x8();
30646        let b = b.as_f64x8();
30647        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30648        r.cast_unsigned()
30649    }
30650}
30651
30652/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30653///
30654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30655#[inline]
30656#[target_feature(enable = "avx512f")]
30657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30658#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30659pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30660    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30661}
30662
30663/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30664///
30665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30666#[inline]
30667#[target_feature(enable = "avx512f")]
30668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30669#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30670pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30671    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30672}
30673
30674/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30675///
30676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30677#[inline]
30678#[target_feature(enable = "avx512f")]
30679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30680#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30681pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30682    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30683}
30684
30685/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30686///
30687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30688#[inline]
30689#[target_feature(enable = "avx512f")]
30690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30691#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30692pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30693    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30694}
30695
30696/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30697///
30698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30699#[inline]
30700#[target_feature(enable = "avx512f")]
30701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30702#[rustc_legacy_const_generics(2)]
30703#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30704pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30705    unsafe {
30706        static_assert_uimm_bits!(IMM8, 5);
30707        let neg_one = -1;
30708        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30709        r.cast_unsigned()
30710    }
30711}
30712
30713/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30714///
30715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30716#[inline]
30717#[target_feature(enable = "avx512f")]
30718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30719#[rustc_legacy_const_generics(3)]
30720#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30721pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30722    unsafe {
30723        static_assert_uimm_bits!(IMM8, 5);
30724        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30725        r.cast_unsigned()
30726    }
30727}
30728
30729/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30730/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30731///
30732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30733#[inline]
30734#[target_feature(enable = "avx512f")]
30735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30736#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30737#[rustc_legacy_const_generics(2, 3)]
30738pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30739    unsafe {
30740        static_assert_uimm_bits!(IMM5, 5);
30741        static_assert_mantissas_sae!(SAE);
30742        let neg_one = -1;
30743        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30744        r.cast_unsigned()
30745    }
30746}
30747
30748/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30749/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30750///
30751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30752#[inline]
30753#[target_feature(enable = "avx512f")]
30754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30755#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30756#[rustc_legacy_const_generics(3, 4)]
30757pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30758    k1: __mmask8,
30759    a: __m128,
30760    b: __m128,
30761) -> __mmask8 {
30762    unsafe {
30763        static_assert_uimm_bits!(IMM5, 5);
30764        static_assert_mantissas_sae!(SAE);
30765        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30766        r.cast_unsigned()
30767    }
30768}
30769
30770/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30771///
30772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30773#[inline]
30774#[target_feature(enable = "avx512f")]
30775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776#[rustc_legacy_const_generics(2)]
30777#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30778pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30779    unsafe {
30780        static_assert_uimm_bits!(IMM8, 5);
30781        let neg_one = -1;
30782        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30783        r.cast_unsigned()
30784    }
30785}
30786
30787/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30788///
30789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30790#[inline]
30791#[target_feature(enable = "avx512f")]
30792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30793#[rustc_legacy_const_generics(3)]
30794#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30795pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30796    unsafe {
30797        static_assert_uimm_bits!(IMM8, 5);
30798        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30799        r.cast_unsigned()
30800    }
30801}
30802
30803/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30804/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30805///
30806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30807#[inline]
30808#[target_feature(enable = "avx512f")]
30809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30810#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30811#[rustc_legacy_const_generics(2, 3)]
30812pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30813    unsafe {
30814        static_assert_uimm_bits!(IMM5, 5);
30815        static_assert_mantissas_sae!(SAE);
30816        let neg_one = -1;
30817        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30818        r.cast_unsigned()
30819    }
30820}
30821
30822/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30823/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30824///
30825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30826#[inline]
30827#[target_feature(enable = "avx512f")]
30828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30829#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30830#[rustc_legacy_const_generics(3, 4)]
30831pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30832    k1: __mmask8,
30833    a: __m128d,
30834    b: __m128d,
30835) -> __mmask8 {
30836    unsafe {
30837        static_assert_uimm_bits!(IMM5, 5);
30838        static_assert_mantissas_sae!(SAE);
30839        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30840        r.cast_unsigned()
30841    }
30842}
30843
30844/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30847#[inline]
30848#[target_feature(enable = "avx512f")]
30849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30850#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30851pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30852    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30853}
30854
30855/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30856///
30857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30858#[inline]
30859#[target_feature(enable = "avx512f")]
30860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30861#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30862pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30863    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30864}
30865
30866/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30867///
30868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30869#[inline]
30870#[target_feature(enable = "avx512f,avx512vl")]
30871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30872#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30873pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30874    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30875}
30876
30877/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30878///
30879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30880#[inline]
30881#[target_feature(enable = "avx512f,avx512vl")]
30882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30883#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30884pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30885    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30886}
30887
30888/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30889///
30890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30891#[inline]
30892#[target_feature(enable = "avx512f,avx512vl")]
30893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30894#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30895pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30896    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30897}
30898
30899/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30900///
30901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30902#[inline]
30903#[target_feature(enable = "avx512f,avx512vl")]
30904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30905#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30906pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30907    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30908}
30909
30910/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30911///
30912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30913#[inline]
30914#[target_feature(enable = "avx512f")]
30915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30916#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30917pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30918    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30919}
30920
30921/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30922///
30923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30924#[inline]
30925#[target_feature(enable = "avx512f")]
30926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30927#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30928pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30929    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30930}
30931
30932/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30933///
30934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30935#[inline]
30936#[target_feature(enable = "avx512f,avx512vl")]
30937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30938#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30939pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30940    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
30941}
30942
30943/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30944///
30945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30946#[inline]
30947#[target_feature(enable = "avx512f,avx512vl")]
30948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30949#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30950pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30952}
30953
30954/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30955///
30956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30957#[inline]
30958#[target_feature(enable = "avx512f,avx512vl")]
30959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30960#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30961pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30962    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
30963}
30964
30965/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30966///
30967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
30968#[inline]
30969#[target_feature(enable = "avx512f,avx512vl")]
30970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30971#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30972pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30973    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30974}
30975
30976/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30977///
30978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
30979#[inline]
30980#[target_feature(enable = "avx512f")]
30981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30982#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30983pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30984    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
30985}
30986
30987/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30988///
30989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
30990#[inline]
30991#[target_feature(enable = "avx512f")]
30992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30993#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30994pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30995    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
30996}
30997
30998/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30999///
31000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31001#[inline]
31002#[target_feature(enable = "avx512f,avx512vl")]
31003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31004#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31005pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31006    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31007}
31008
31009/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31010///
31011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31012#[inline]
31013#[target_feature(enable = "avx512f,avx512vl")]
31014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31015#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31016pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31017    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31018}
31019
31020/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31021///
31022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31023#[inline]
31024#[target_feature(enable = "avx512f,avx512vl")]
31025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31026#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31027pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31028    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31029}
31030
31031/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31032///
31033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31034#[inline]
31035#[target_feature(enable = "avx512f,avx512vl")]
31036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31037#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31038pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31039    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31040}
31041
31042/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31043///
31044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31045#[inline]
31046#[target_feature(enable = "avx512f")]
31047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31048#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31049pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31050    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31051}
31052
31053/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31054///
31055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31056#[inline]
31057#[target_feature(enable = "avx512f")]
31058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31059#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31060pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31061    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31062}
31063
31064/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31065///
31066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31067#[inline]
31068#[target_feature(enable = "avx512f,avx512vl")]
31069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31070#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31071pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31072    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31073}
31074
31075/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31076///
31077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31078#[inline]
31079#[target_feature(enable = "avx512f,avx512vl")]
31080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31081#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31082pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31083    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31084}
31085
31086/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31087///
31088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31089#[inline]
31090#[target_feature(enable = "avx512f,avx512vl")]
31091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31092#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31093pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31094    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31095}
31096
31097/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31098///
31099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31100#[inline]
31101#[target_feature(enable = "avx512f,avx512vl")]
31102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31103#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31104pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31105    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31106}
31107
31108/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31109///
31110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31111#[inline]
31112#[target_feature(enable = "avx512f")]
31113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31114#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31115pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31116    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31117}
31118
31119/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31120///
31121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31122#[inline]
31123#[target_feature(enable = "avx512f")]
31124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31125#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31126pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31127    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31128}
31129
31130/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31131///
31132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31133#[inline]
31134#[target_feature(enable = "avx512f,avx512vl")]
31135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31136#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31137pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31138    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31139}
31140
31141/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31142///
31143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31144#[inline]
31145#[target_feature(enable = "avx512f,avx512vl")]
31146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31147#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31148pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31149    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31150}
31151
31152/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31153///
31154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31155#[inline]
31156#[target_feature(enable = "avx512f,avx512vl")]
31157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31158#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31159pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31160    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31161}
31162
31163/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31164///
31165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31166#[inline]
31167#[target_feature(enable = "avx512f,avx512vl")]
31168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31169#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31170pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31171    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31172}
31173
31174/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31175///
31176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31177#[inline]
31178#[target_feature(enable = "avx512f")]
31179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31180#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31181pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31182    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31183}
31184
31185/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31186///
31187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31188#[inline]
31189#[target_feature(enable = "avx512f")]
31190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31191#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31192pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31193    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31194}
31195
31196/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31197///
31198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31199#[inline]
31200#[target_feature(enable = "avx512f,avx512vl")]
31201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31202#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31203pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31204    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31205}
31206
31207/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31208///
31209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31210#[inline]
31211#[target_feature(enable = "avx512f,avx512vl")]
31212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31213#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31214pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31215    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31216}
31217
31218/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31219///
31220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31221#[inline]
31222#[target_feature(enable = "avx512f,avx512vl")]
31223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31224#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31225pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31226    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31227}
31228
31229/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31230///
31231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31232#[inline]
31233#[target_feature(enable = "avx512f,avx512vl")]
31234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31235#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31236pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31237    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31238}
31239
31240/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31241///
31242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31243#[inline]
31244#[target_feature(enable = "avx512f")]
31245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31246#[rustc_legacy_const_generics(2)]
31247#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31248pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31249    unsafe {
31250        static_assert_uimm_bits!(IMM3, 3);
31251        let a = a.as_u32x16();
31252        let b = b.as_u32x16();
31253        let r = match IMM3 {
31254            0 => simd_eq(a, b),
31255            1 => simd_lt(a, b),
31256            2 => simd_le(a, b),
31257            3 => i32x16::ZERO,
31258            4 => simd_ne(a, b),
31259            5 => simd_ge(a, b),
31260            6 => simd_gt(a, b),
31261            _ => i32x16::splat(-1),
31262        };
31263        simd_bitmask(r)
31264    }
31265}
31266
31267/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31268///
31269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31270#[inline]
31271#[target_feature(enable = "avx512f")]
31272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31273#[rustc_legacy_const_generics(3)]
31274#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31275pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31276    k1: __mmask16,
31277    a: __m512i,
31278    b: __m512i,
31279) -> __mmask16 {
31280    unsafe {
31281        static_assert_uimm_bits!(IMM3, 3);
31282        let a = a.as_u32x16();
31283        let b = b.as_u32x16();
31284        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31285        let r = match IMM3 {
31286            0 => simd_and(k1, simd_eq(a, b)),
31287            1 => simd_and(k1, simd_lt(a, b)),
31288            2 => simd_and(k1, simd_le(a, b)),
31289            3 => i32x16::ZERO,
31290            4 => simd_and(k1, simd_ne(a, b)),
31291            5 => simd_and(k1, simd_ge(a, b)),
31292            6 => simd_and(k1, simd_gt(a, b)),
31293            _ => k1,
31294        };
31295        simd_bitmask(r)
31296    }
31297}
31298
31299/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31300///
31301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31302#[inline]
31303#[target_feature(enable = "avx512f,avx512vl")]
31304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31305#[rustc_legacy_const_generics(2)]
31306#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31307pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31308    unsafe {
31309        static_assert_uimm_bits!(IMM3, 3);
31310        let a = a.as_u32x8();
31311        let b = b.as_u32x8();
31312        let r = match IMM3 {
31313            0 => simd_eq(a, b),
31314            1 => simd_lt(a, b),
31315            2 => simd_le(a, b),
31316            3 => i32x8::ZERO,
31317            4 => simd_ne(a, b),
31318            5 => simd_ge(a, b),
31319            6 => simd_gt(a, b),
31320            _ => i32x8::splat(-1),
31321        };
31322        simd_bitmask(r)
31323    }
31324}
31325
31326/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31327///
31328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31329#[inline]
31330#[target_feature(enable = "avx512f,avx512vl")]
31331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31332#[rustc_legacy_const_generics(3)]
31333#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31334pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31335    k1: __mmask8,
31336    a: __m256i,
31337    b: __m256i,
31338) -> __mmask8 {
31339    unsafe {
31340        static_assert_uimm_bits!(IMM3, 3);
31341        let a = a.as_u32x8();
31342        let b = b.as_u32x8();
31343        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31344        let r = match IMM3 {
31345            0 => simd_and(k1, simd_eq(a, b)),
31346            1 => simd_and(k1, simd_lt(a, b)),
31347            2 => simd_and(k1, simd_le(a, b)),
31348            3 => i32x8::ZERO,
31349            4 => simd_and(k1, simd_ne(a, b)),
31350            5 => simd_and(k1, simd_ge(a, b)),
31351            6 => simd_and(k1, simd_gt(a, b)),
31352            _ => k1,
31353        };
31354        simd_bitmask(r)
31355    }
31356}
31357
31358/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31359///
31360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31361#[inline]
31362#[target_feature(enable = "avx512f,avx512vl")]
31363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31364#[rustc_legacy_const_generics(2)]
31365#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31366pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31367    unsafe {
31368        static_assert_uimm_bits!(IMM3, 3);
31369        let a = a.as_u32x4();
31370        let b = b.as_u32x4();
31371        let r = match IMM3 {
31372            0 => simd_eq(a, b),
31373            1 => simd_lt(a, b),
31374            2 => simd_le(a, b),
31375            3 => i32x4::ZERO,
31376            4 => simd_ne(a, b),
31377            5 => simd_ge(a, b),
31378            6 => simd_gt(a, b),
31379            _ => i32x4::splat(-1),
31380        };
31381        simd_bitmask(r)
31382    }
31383}
31384
31385/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31386///
31387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31388#[inline]
31389#[target_feature(enable = "avx512f,avx512vl")]
31390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31391#[rustc_legacy_const_generics(3)]
31392#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31393pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31394    k1: __mmask8,
31395    a: __m128i,
31396    b: __m128i,
31397) -> __mmask8 {
31398    unsafe {
31399        static_assert_uimm_bits!(IMM3, 3);
31400        let a = a.as_u32x4();
31401        let b = b.as_u32x4();
31402        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31403        let r = match IMM3 {
31404            0 => simd_and(k1, simd_eq(a, b)),
31405            1 => simd_and(k1, simd_lt(a, b)),
31406            2 => simd_and(k1, simd_le(a, b)),
31407            3 => i32x4::ZERO,
31408            4 => simd_and(k1, simd_ne(a, b)),
31409            5 => simd_and(k1, simd_ge(a, b)),
31410            6 => simd_and(k1, simd_gt(a, b)),
31411            _ => k1,
31412        };
31413        simd_bitmask(r)
31414    }
31415}
31416
31417/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31418///
31419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31420#[inline]
31421#[target_feature(enable = "avx512f")]
31422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31423#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31424pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31425    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31426}
31427
31428/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31429///
31430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31431#[inline]
31432#[target_feature(enable = "avx512f")]
31433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31434#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31435pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31436    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31437}
31438
31439/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31440///
31441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31442#[inline]
31443#[target_feature(enable = "avx512f,avx512vl")]
31444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31445#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31446pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31447    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31448}
31449
31450/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31451///
31452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31453#[inline]
31454#[target_feature(enable = "avx512f,avx512vl")]
31455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31456#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31457pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31458    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31459}
31460
31461/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31462///
31463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31464#[inline]
31465#[target_feature(enable = "avx512f,avx512vl")]
31466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31467#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31468pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31469    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31470}
31471
31472/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31473///
31474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31475#[inline]
31476#[target_feature(enable = "avx512f,avx512vl")]
31477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31478#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31479pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31480    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31481}
31482
31483/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31484///
31485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31486#[inline]
31487#[target_feature(enable = "avx512f")]
31488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31489#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31490pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31491    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31492}
31493
31494/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31495///
31496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31497#[inline]
31498#[target_feature(enable = "avx512f")]
31499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31500#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31501pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31502    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31503}
31504
31505/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31506///
31507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31508#[inline]
31509#[target_feature(enable = "avx512f,avx512vl")]
31510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31511#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31512pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31513    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31514}
31515
31516/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31517///
31518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31519#[inline]
31520#[target_feature(enable = "avx512f,avx512vl")]
31521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31522#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31523pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31524    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31525}
31526
31527/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31528///
31529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31530#[inline]
31531#[target_feature(enable = "avx512f,avx512vl")]
31532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31533#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31534pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31535    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31536}
31537
31538/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31539///
31540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31541#[inline]
31542#[target_feature(enable = "avx512f,avx512vl")]
31543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31544#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31545pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31546    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31547}
31548
31549/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31550///
31551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31552#[inline]
31553#[target_feature(enable = "avx512f")]
31554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31555#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31556pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31557    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31558}
31559
31560/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31561///
31562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31563#[inline]
31564#[target_feature(enable = "avx512f")]
31565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31566#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31567pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31568    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31569}
31570
31571/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31572///
31573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31574#[inline]
31575#[target_feature(enable = "avx512f,avx512vl")]
31576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31577#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31578pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31579    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31580}
31581
31582/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31583///
31584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31585#[inline]
31586#[target_feature(enable = "avx512f,avx512vl")]
31587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31588#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31589pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31590    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31591}
31592
31593/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31594///
31595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31596#[inline]
31597#[target_feature(enable = "avx512f,avx512vl")]
31598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31599#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31600pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31601    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31602}
31603
31604/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31605///
31606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31607#[inline]
31608#[target_feature(enable = "avx512f,avx512vl")]
31609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31610#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31611pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31612    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31613}
31614
31615/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31616///
31617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31618#[inline]
31619#[target_feature(enable = "avx512f")]
31620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31621#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31622pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31623    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31624}
31625
31626/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31627///
31628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31629#[inline]
31630#[target_feature(enable = "avx512f")]
31631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31632#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31633pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31634    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31635}
31636
31637/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31638///
31639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31640#[inline]
31641#[target_feature(enable = "avx512f,avx512vl")]
31642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31643#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31644pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31645    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31646}
31647
31648/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31649///
31650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31651#[inline]
31652#[target_feature(enable = "avx512f,avx512vl")]
31653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31654#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31655pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31656    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31657}
31658
31659/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31660///
31661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31662#[inline]
31663#[target_feature(enable = "avx512f,avx512vl")]
31664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31665#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31666pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31667    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31668}
31669
31670/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31671///
31672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31673#[inline]
31674#[target_feature(enable = "avx512f,avx512vl")]
31675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31676#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31677pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31678    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31679}
31680
31681/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31682///
31683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31684#[inline]
31685#[target_feature(enable = "avx512f")]
31686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31687#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31688pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31689    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31690}
31691
31692/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31693///
31694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31695#[inline]
31696#[target_feature(enable = "avx512f")]
31697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31698#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31699pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31700    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31701}
31702
31703/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31704///
31705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31706#[inline]
31707#[target_feature(enable = "avx512f,avx512vl")]
31708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31709#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31710pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31711    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31712}
31713
31714/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31715///
31716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31717#[inline]
31718#[target_feature(enable = "avx512f,avx512vl")]
31719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31720#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31721pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31722    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31723}
31724
31725/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31726///
31727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31728#[inline]
31729#[target_feature(enable = "avx512f,avx512vl")]
31730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31731#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31732pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31733    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31734}
31735
31736/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31737///
31738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31739#[inline]
31740#[target_feature(enable = "avx512f,avx512vl")]
31741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31742#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31743pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31744    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31745}
31746
31747/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31748///
31749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31750#[inline]
31751#[target_feature(enable = "avx512f")]
31752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31753#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31754pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31755    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31756}
31757
31758/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31759///
31760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31761#[inline]
31762#[target_feature(enable = "avx512f")]
31763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31764#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31765pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31766    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31767}
31768
31769/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31770///
31771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31772#[inline]
31773#[target_feature(enable = "avx512f,avx512vl")]
31774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31775#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31776pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31777    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31778}
31779
31780/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31781///
31782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31783#[inline]
31784#[target_feature(enable = "avx512f,avx512vl")]
31785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31786#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31787pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31788    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31789}
31790
31791/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31792///
31793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31794#[inline]
31795#[target_feature(enable = "avx512f,avx512vl")]
31796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31797#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31798pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31799    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31800}
31801
31802/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31803///
31804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31805#[inline]
31806#[target_feature(enable = "avx512f,avx512vl")]
31807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31808#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31809pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31810    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31811}
31812
31813/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31814///
31815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31816#[inline]
31817#[target_feature(enable = "avx512f")]
31818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31819#[rustc_legacy_const_generics(2)]
31820#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31821pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31822    unsafe {
31823        static_assert_uimm_bits!(IMM3, 3);
31824        let a = a.as_i32x16();
31825        let b = b.as_i32x16();
31826        let r = match IMM3 {
31827            0 => simd_eq(a, b),
31828            1 => simd_lt(a, b),
31829            2 => simd_le(a, b),
31830            3 => i32x16::ZERO,
31831            4 => simd_ne(a, b),
31832            5 => simd_ge(a, b),
31833            6 => simd_gt(a, b),
31834            _ => i32x16::splat(-1),
31835        };
31836        simd_bitmask(r)
31837    }
31838}
31839
31840/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31841///
31842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31843#[inline]
31844#[target_feature(enable = "avx512f")]
31845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31846#[rustc_legacy_const_generics(3)]
31847#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31848pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31849    k1: __mmask16,
31850    a: __m512i,
31851    b: __m512i,
31852) -> __mmask16 {
31853    unsafe {
31854        static_assert_uimm_bits!(IMM3, 3);
31855        let a = a.as_i32x16();
31856        let b = b.as_i32x16();
31857        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31858        let r = match IMM3 {
31859            0 => simd_and(k1, simd_eq(a, b)),
31860            1 => simd_and(k1, simd_lt(a, b)),
31861            2 => simd_and(k1, simd_le(a, b)),
31862            3 => i32x16::ZERO,
31863            4 => simd_and(k1, simd_ne(a, b)),
31864            5 => simd_and(k1, simd_ge(a, b)),
31865            6 => simd_and(k1, simd_gt(a, b)),
31866            _ => k1,
31867        };
31868        simd_bitmask(r)
31869    }
31870}
31871
31872/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31873///
31874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31875#[inline]
31876#[target_feature(enable = "avx512f,avx512vl")]
31877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31878#[rustc_legacy_const_generics(2)]
31879#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31880pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31881    unsafe {
31882        static_assert_uimm_bits!(IMM3, 3);
31883        let a = a.as_i32x8();
31884        let b = b.as_i32x8();
31885        let r = match IMM3 {
31886            0 => simd_eq(a, b),
31887            1 => simd_lt(a, b),
31888            2 => simd_le(a, b),
31889            3 => i32x8::ZERO,
31890            4 => simd_ne(a, b),
31891            5 => simd_ge(a, b),
31892            6 => simd_gt(a, b),
31893            _ => i32x8::splat(-1),
31894        };
31895        simd_bitmask(r)
31896    }
31897}
31898
31899/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31900///
31901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31902#[inline]
31903#[target_feature(enable = "avx512f,avx512vl")]
31904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31905#[rustc_legacy_const_generics(3)]
31906#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31907pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31908    k1: __mmask8,
31909    a: __m256i,
31910    b: __m256i,
31911) -> __mmask8 {
31912    unsafe {
31913        static_assert_uimm_bits!(IMM3, 3);
31914        let a = a.as_i32x8();
31915        let b = b.as_i32x8();
31916        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31917        let r = match IMM3 {
31918            0 => simd_and(k1, simd_eq(a, b)),
31919            1 => simd_and(k1, simd_lt(a, b)),
31920            2 => simd_and(k1, simd_le(a, b)),
31921            3 => i32x8::ZERO,
31922            4 => simd_and(k1, simd_ne(a, b)),
31923            5 => simd_and(k1, simd_ge(a, b)),
31924            6 => simd_and(k1, simd_gt(a, b)),
31925            _ => k1,
31926        };
31927        simd_bitmask(r)
31928    }
31929}
31930
31931/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31932///
31933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31934#[inline]
31935#[target_feature(enable = "avx512f,avx512vl")]
31936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31937#[rustc_legacy_const_generics(2)]
31938#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31939pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31940    unsafe {
31941        static_assert_uimm_bits!(IMM3, 3);
31942        let a = a.as_i32x4();
31943        let b = b.as_i32x4();
31944        let r = match IMM3 {
31945            0 => simd_eq(a, b),
31946            1 => simd_lt(a, b),
31947            2 => simd_le(a, b),
31948            3 => i32x4::ZERO,
31949            4 => simd_ne(a, b),
31950            5 => simd_ge(a, b),
31951            6 => simd_gt(a, b),
31952            _ => i32x4::splat(-1),
31953        };
31954        simd_bitmask(r)
31955    }
31956}
31957
31958/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31959///
31960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
31961#[inline]
31962#[target_feature(enable = "avx512f,avx512vl")]
31963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31964#[rustc_legacy_const_generics(3)]
31965#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31966pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31967    k1: __mmask8,
31968    a: __m128i,
31969    b: __m128i,
31970) -> __mmask8 {
31971    unsafe {
31972        static_assert_uimm_bits!(IMM3, 3);
31973        let a = a.as_i32x4();
31974        let b = b.as_i32x4();
31975        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31976        let r = match IMM3 {
31977            0 => simd_and(k1, simd_eq(a, b)),
31978            1 => simd_and(k1, simd_lt(a, b)),
31979            2 => simd_and(k1, simd_le(a, b)),
31980            3 => i32x4::ZERO,
31981            4 => simd_and(k1, simd_ne(a, b)),
31982            5 => simd_and(k1, simd_ge(a, b)),
31983            6 => simd_and(k1, simd_gt(a, b)),
31984            _ => k1,
31985        };
31986        simd_bitmask(r)
31987    }
31988}
31989
31990/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
31991///
31992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
31993#[inline]
31994#[target_feature(enable = "avx512f")]
31995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31996#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
31997pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31998    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
31999}
32000
32001/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32002///
32003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32004#[inline]
32005#[target_feature(enable = "avx512f")]
32006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32007#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32008pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32009    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32010}
32011
32012/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32013///
32014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32015#[inline]
32016#[target_feature(enable = "avx512f,avx512vl")]
32017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32018#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32019pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32020    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32021}
32022
32023/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32024///
32025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32026#[inline]
32027#[target_feature(enable = "avx512f,avx512vl")]
32028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32029#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32030pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32031    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32032}
32033
32034/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32035///
32036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32037#[inline]
32038#[target_feature(enable = "avx512f,avx512vl")]
32039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32040#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32041pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32042    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32043}
32044
32045/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32046///
32047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32048#[inline]
32049#[target_feature(enable = "avx512f,avx512vl")]
32050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32051#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32052pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32053    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32054}
32055
32056/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32057///
32058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32059#[inline]
32060#[target_feature(enable = "avx512f")]
32061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32062#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32063pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32064    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32065}
32066
32067/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32068///
32069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32070#[inline]
32071#[target_feature(enable = "avx512f")]
32072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32073#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32074pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32075    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32076}
32077
32078/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32079///
32080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32081#[inline]
32082#[target_feature(enable = "avx512f,avx512vl")]
32083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32084#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32085pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32086    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32087}
32088
32089/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32090///
32091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32092#[inline]
32093#[target_feature(enable = "avx512f,avx512vl")]
32094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32096pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32097    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32098}
32099
32100/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32101///
32102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32103#[inline]
32104#[target_feature(enable = "avx512f,avx512vl")]
32105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32106#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32107pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32108    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32109}
32110
32111/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32112///
32113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32114#[inline]
32115#[target_feature(enable = "avx512f,avx512vl")]
32116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32117#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32118pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32119    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32120}
32121
32122/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32123///
32124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32125#[inline]
32126#[target_feature(enable = "avx512f")]
32127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32128#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32129pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32130    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32131}
32132
32133/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32134///
32135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32136#[inline]
32137#[target_feature(enable = "avx512f")]
32138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32139#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32140pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32141    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32142}
32143
32144/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32145///
32146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32147#[inline]
32148#[target_feature(enable = "avx512f,avx512vl")]
32149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32150#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32151pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32152    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32153}
32154
32155/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32156///
32157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32158#[inline]
32159#[target_feature(enable = "avx512f,avx512vl")]
32160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32161#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32162pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32163    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32164}
32165
32166/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32167///
32168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32169#[inline]
32170#[target_feature(enable = "avx512f,avx512vl")]
32171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32172#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32173pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32174    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32175}
32176
32177/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32178///
32179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32180#[inline]
32181#[target_feature(enable = "avx512f,avx512vl")]
32182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32183#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32184pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32185    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32186}
32187
32188/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32189///
32190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32191#[inline]
32192#[target_feature(enable = "avx512f")]
32193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32194#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32195pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32196    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32197}
32198
32199/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32200///
32201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32202#[inline]
32203#[target_feature(enable = "avx512f")]
32204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32205#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32206pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32207    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32208}
32209
32210/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32211///
32212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32213#[inline]
32214#[target_feature(enable = "avx512f,avx512vl")]
32215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32216#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32217pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32218    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32219}
32220
32221/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32222///
32223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32224#[inline]
32225#[target_feature(enable = "avx512f,avx512vl")]
32226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32227#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32228pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32229    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32230}
32231
32232/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32233///
32234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32235#[inline]
32236#[target_feature(enable = "avx512f,avx512vl")]
32237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32238#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32239pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32240    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32241}
32242
32243/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32244///
32245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32246#[inline]
32247#[target_feature(enable = "avx512f,avx512vl")]
32248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32249#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32250pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32251    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32252}
32253
32254/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32255///
32256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32257#[inline]
32258#[target_feature(enable = "avx512f")]
32259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32260#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32261pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32262    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32263}
32264
32265/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32266///
32267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32268#[inline]
32269#[target_feature(enable = "avx512f")]
32270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32271#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32272pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32273    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32274}
32275
32276/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32277///
32278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32279#[inline]
32280#[target_feature(enable = "avx512f,avx512vl")]
32281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32283pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32284    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32285}
32286
32287/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32288///
32289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32290#[inline]
32291#[target_feature(enable = "avx512f,avx512vl")]
32292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32293#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32294pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32295    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32296}
32297
32298/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32299///
32300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32301#[inline]
32302#[target_feature(enable = "avx512f,avx512vl")]
32303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32304#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32305pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32306    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32307}
32308
32309/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32310///
32311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32312#[inline]
32313#[target_feature(enable = "avx512f,avx512vl")]
32314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32315#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32316pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32317    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32318}
32319
32320/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32321///
32322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32323#[inline]
32324#[target_feature(enable = "avx512f")]
32325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32326#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32327pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32328    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32329}
32330
32331/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32332///
32333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32334#[inline]
32335#[target_feature(enable = "avx512f")]
32336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32338pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32339    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32340}
32341
32342/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32343///
32344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32345#[inline]
32346#[target_feature(enable = "avx512f,avx512vl")]
32347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32348#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32349pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32350    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32351}
32352
32353/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32354///
32355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32356#[inline]
32357#[target_feature(enable = "avx512f,avx512vl")]
32358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32359#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32360pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32361    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32362}
32363
32364/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32365///
32366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32367#[inline]
32368#[target_feature(enable = "avx512f,avx512vl")]
32369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32370#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32371pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32372    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32373}
32374
32375/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32376///
32377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32378#[inline]
32379#[target_feature(enable = "avx512f,avx512vl")]
32380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32381#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32382pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32383    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32384}
32385
32386/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32387///
32388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32389#[inline]
32390#[target_feature(enable = "avx512f")]
32391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32392#[rustc_legacy_const_generics(2)]
32393#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32394pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32395    unsafe {
32396        static_assert_uimm_bits!(IMM3, 3);
32397        let a = a.as_u64x8();
32398        let b = b.as_u64x8();
32399        let r = match IMM3 {
32400            0 => simd_eq(a, b),
32401            1 => simd_lt(a, b),
32402            2 => simd_le(a, b),
32403            3 => i64x8::ZERO,
32404            4 => simd_ne(a, b),
32405            5 => simd_ge(a, b),
32406            6 => simd_gt(a, b),
32407            _ => i64x8::splat(-1),
32408        };
32409        simd_bitmask(r)
32410    }
32411}
32412
32413/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32414///
32415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32416#[inline]
32417#[target_feature(enable = "avx512f")]
32418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32419#[rustc_legacy_const_generics(3)]
32420#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32421pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32422    k1: __mmask8,
32423    a: __m512i,
32424    b: __m512i,
32425) -> __mmask8 {
32426    unsafe {
32427        static_assert_uimm_bits!(IMM3, 3);
32428        let a = a.as_u64x8();
32429        let b = b.as_u64x8();
32430        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32431        let r = match IMM3 {
32432            0 => simd_and(k1, simd_eq(a, b)),
32433            1 => simd_and(k1, simd_lt(a, b)),
32434            2 => simd_and(k1, simd_le(a, b)),
32435            3 => i64x8::ZERO,
32436            4 => simd_and(k1, simd_ne(a, b)),
32437            5 => simd_and(k1, simd_ge(a, b)),
32438            6 => simd_and(k1, simd_gt(a, b)),
32439            _ => k1,
32440        };
32441        simd_bitmask(r)
32442    }
32443}
32444
32445/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32446///
32447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32448#[inline]
32449#[target_feature(enable = "avx512f,avx512vl")]
32450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32451#[rustc_legacy_const_generics(2)]
32452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32453pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32454    unsafe {
32455        static_assert_uimm_bits!(IMM3, 3);
32456        let a = a.as_u64x4();
32457        let b = b.as_u64x4();
32458        let r = match IMM3 {
32459            0 => simd_eq(a, b),
32460            1 => simd_lt(a, b),
32461            2 => simd_le(a, b),
32462            3 => i64x4::ZERO,
32463            4 => simd_ne(a, b),
32464            5 => simd_ge(a, b),
32465            6 => simd_gt(a, b),
32466            _ => i64x4::splat(-1),
32467        };
32468        simd_bitmask(r)
32469    }
32470}
32471
32472/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32473///
32474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32475#[inline]
32476#[target_feature(enable = "avx512f,avx512vl")]
32477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32478#[rustc_legacy_const_generics(3)]
32479#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32480pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32481    k1: __mmask8,
32482    a: __m256i,
32483    b: __m256i,
32484) -> __mmask8 {
32485    unsafe {
32486        static_assert_uimm_bits!(IMM3, 3);
32487        let a = a.as_u64x4();
32488        let b = b.as_u64x4();
32489        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32490        let r = match IMM3 {
32491            0 => simd_and(k1, simd_eq(a, b)),
32492            1 => simd_and(k1, simd_lt(a, b)),
32493            2 => simd_and(k1, simd_le(a, b)),
32494            3 => i64x4::ZERO,
32495            4 => simd_and(k1, simd_ne(a, b)),
32496            5 => simd_and(k1, simd_ge(a, b)),
32497            6 => simd_and(k1, simd_gt(a, b)),
32498            _ => k1,
32499        };
32500        simd_bitmask(r)
32501    }
32502}
32503
32504/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32505///
32506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32507#[inline]
32508#[target_feature(enable = "avx512f,avx512vl")]
32509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32510#[rustc_legacy_const_generics(2)]
32511#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32512pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32513    unsafe {
32514        static_assert_uimm_bits!(IMM3, 3);
32515        let a = a.as_u64x2();
32516        let b = b.as_u64x2();
32517        let r = match IMM3 {
32518            0 => simd_eq(a, b),
32519            1 => simd_lt(a, b),
32520            2 => simd_le(a, b),
32521            3 => i64x2::ZERO,
32522            4 => simd_ne(a, b),
32523            5 => simd_ge(a, b),
32524            6 => simd_gt(a, b),
32525            _ => i64x2::splat(-1),
32526        };
32527        simd_bitmask(r)
32528    }
32529}
32530
32531/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32532///
32533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32534#[inline]
32535#[target_feature(enable = "avx512f,avx512vl")]
32536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32537#[rustc_legacy_const_generics(3)]
32538#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32539pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32540    k1: __mmask8,
32541    a: __m128i,
32542    b: __m128i,
32543) -> __mmask8 {
32544    unsafe {
32545        static_assert_uimm_bits!(IMM3, 3);
32546        let a = a.as_u64x2();
32547        let b = b.as_u64x2();
32548        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32549        let r = match IMM3 {
32550            0 => simd_and(k1, simd_eq(a, b)),
32551            1 => simd_and(k1, simd_lt(a, b)),
32552            2 => simd_and(k1, simd_le(a, b)),
32553            3 => i64x2::ZERO,
32554            4 => simd_and(k1, simd_ne(a, b)),
32555            5 => simd_and(k1, simd_ge(a, b)),
32556            6 => simd_and(k1, simd_gt(a, b)),
32557            _ => k1,
32558        };
32559        simd_bitmask(r)
32560    }
32561}
32562
32563/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32564///
32565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32566#[inline]
32567#[target_feature(enable = "avx512f")]
32568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32569#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32570pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32571    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32572}
32573
32574/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32575///
32576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32577#[inline]
32578#[target_feature(enable = "avx512f")]
32579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32580#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32581pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32582    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32583}
32584
32585/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32586///
32587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32588#[inline]
32589#[target_feature(enable = "avx512f,avx512vl")]
32590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32591#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32592pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32593    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32594}
32595
32596/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32597///
32598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32599#[inline]
32600#[target_feature(enable = "avx512f,avx512vl")]
32601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32602#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32603pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32604    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32605}
32606
32607/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32608///
32609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32610#[inline]
32611#[target_feature(enable = "avx512f,avx512vl")]
32612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32613#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32614pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32615    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32616}
32617
32618/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32619///
32620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32621#[inline]
32622#[target_feature(enable = "avx512f,avx512vl")]
32623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32624#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32625pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32626    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32627}
32628
32629/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32630///
32631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32632#[inline]
32633#[target_feature(enable = "avx512f")]
32634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32635#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32636pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32637    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32638}
32639
32640/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32641///
32642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32643#[inline]
32644#[target_feature(enable = "avx512f")]
32645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32646#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32647pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32648    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32649}
32650
32651/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32652///
32653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32654#[inline]
32655#[target_feature(enable = "avx512f,avx512vl")]
32656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32657#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32658pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32659    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32660}
32661
32662/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32663///
32664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32665#[inline]
32666#[target_feature(enable = "avx512f,avx512vl")]
32667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32668#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32669pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32670    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32671}
32672
32673/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32674///
32675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32676#[inline]
32677#[target_feature(enable = "avx512f,avx512vl")]
32678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32679#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32680pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32682}
32683
32684/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685///
32686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32687#[inline]
32688#[target_feature(enable = "avx512f,avx512vl")]
32689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32690#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32691pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32692    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32693}
32694
32695/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32696///
32697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32698#[inline]
32699#[target_feature(enable = "avx512f")]
32700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32701#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32702pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32703    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32704}
32705
32706/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32707///
32708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32709#[inline]
32710#[target_feature(enable = "avx512f")]
32711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32712#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32713pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32714    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32715}
32716
32717/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32718///
32719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32720#[inline]
32721#[target_feature(enable = "avx512f,avx512vl")]
32722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32723#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32724pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32725    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32726}
32727
32728/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32729///
32730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32731#[inline]
32732#[target_feature(enable = "avx512f,avx512vl")]
32733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32734#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32735pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32736    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32737}
32738
32739/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32740///
32741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32742#[inline]
32743#[target_feature(enable = "avx512f,avx512vl")]
32744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32745#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32746pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32747    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32748}
32749
32750/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32751///
32752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32753#[inline]
32754#[target_feature(enable = "avx512f,avx512vl")]
32755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32756#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32757pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32758    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32759}
32760
32761/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32762///
32763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32764#[inline]
32765#[target_feature(enable = "avx512f")]
32766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32767#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32768pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32769    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32770}
32771
32772/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32773///
32774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32775#[inline]
32776#[target_feature(enable = "avx512f")]
32777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32778#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32779pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32780    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32781}
32782
32783/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32784///
32785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32786#[inline]
32787#[target_feature(enable = "avx512f,avx512vl")]
32788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32789#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32790pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32791    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32792}
32793
32794/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32795///
32796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32797#[inline]
32798#[target_feature(enable = "avx512f,avx512vl")]
32799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32800#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32801pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32802    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32803}
32804
32805/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32806///
32807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32808#[inline]
32809#[target_feature(enable = "avx512f,avx512vl")]
32810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32811#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32812pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32813    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32814}
32815
32816/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32817///
32818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32819#[inline]
32820#[target_feature(enable = "avx512f,avx512vl")]
32821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32822#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32823pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32824    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32825}
32826
32827/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32828///
32829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32830#[inline]
32831#[target_feature(enable = "avx512f")]
32832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32833#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32834pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32835    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32836}
32837
32838/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32839///
32840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32841#[inline]
32842#[target_feature(enable = "avx512f")]
32843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32844#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32845pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32846    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32847}
32848
32849/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32850///
32851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32852#[inline]
32853#[target_feature(enable = "avx512f,avx512vl")]
32854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32855#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32856pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32857    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32858}
32859
32860/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32861///
32862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32863#[inline]
32864#[target_feature(enable = "avx512f,avx512vl")]
32865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32866#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32867pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32868    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32869}
32870
32871/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32872///
32873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32874#[inline]
32875#[target_feature(enable = "avx512f,avx512vl")]
32876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32877#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32878pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32879    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32880}
32881
32882/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32883///
32884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32885#[inline]
32886#[target_feature(enable = "avx512f,avx512vl")]
32887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32888#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32889pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32890    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32891}
32892
32893/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32894///
32895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32896#[inline]
32897#[target_feature(enable = "avx512f")]
32898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32899#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32900pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32901    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32902}
32903
32904/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32905///
32906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32907#[inline]
32908#[target_feature(enable = "avx512f")]
32909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32910#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32911pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32912    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32913}
32914
32915/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32916///
32917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32918#[inline]
32919#[target_feature(enable = "avx512f,avx512vl")]
32920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32921#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32922pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32923    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32924}
32925
32926/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32927///
32928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32929#[inline]
32930#[target_feature(enable = "avx512f,avx512vl")]
32931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32933pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32934    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32935}
32936
32937/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32938///
32939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32940#[inline]
32941#[target_feature(enable = "avx512f,avx512vl")]
32942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32943#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32944pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32945    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
32946}
32947
32948/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32949///
32950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32951#[inline]
32952#[target_feature(enable = "avx512f,avx512vl")]
32953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32954#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32955pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32956    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32957}
32958
32959/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32960///
32961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
32962#[inline]
32963#[target_feature(enable = "avx512f")]
32964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32965#[rustc_legacy_const_generics(2)]
32966#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32967pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32968    unsafe {
32969        static_assert_uimm_bits!(IMM3, 3);
32970        let a = a.as_i64x8();
32971        let b = b.as_i64x8();
32972        let r = match IMM3 {
32973            0 => simd_eq(a, b),
32974            1 => simd_lt(a, b),
32975            2 => simd_le(a, b),
32976            3 => i64x8::ZERO,
32977            4 => simd_ne(a, b),
32978            5 => simd_ge(a, b),
32979            6 => simd_gt(a, b),
32980            _ => i64x8::splat(-1),
32981        };
32982        simd_bitmask(r)
32983    }
32984}
32985
32986/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32987///
32988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
32989#[inline]
32990#[target_feature(enable = "avx512f")]
32991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32992#[rustc_legacy_const_generics(3)]
32993#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32994pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
32995    k1: __mmask8,
32996    a: __m512i,
32997    b: __m512i,
32998) -> __mmask8 {
32999    unsafe {
33000        static_assert_uimm_bits!(IMM3, 3);
33001        let a = a.as_i64x8();
33002        let b = b.as_i64x8();
33003        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33004        let r = match IMM3 {
33005            0 => simd_and(k1, simd_eq(a, b)),
33006            1 => simd_and(k1, simd_lt(a, b)),
33007            2 => simd_and(k1, simd_le(a, b)),
33008            3 => i64x8::ZERO,
33009            4 => simd_and(k1, simd_ne(a, b)),
33010            5 => simd_and(k1, simd_ge(a, b)),
33011            6 => simd_and(k1, simd_gt(a, b)),
33012            _ => k1,
33013        };
33014        simd_bitmask(r)
33015    }
33016}
33017
33018/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33019///
33020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33021#[inline]
33022#[target_feature(enable = "avx512f,avx512vl")]
33023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33024#[rustc_legacy_const_generics(2)]
33025#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33026pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33027    unsafe {
33028        static_assert_uimm_bits!(IMM3, 3);
33029        let a = a.as_i64x4();
33030        let b = b.as_i64x4();
33031        let r = match IMM3 {
33032            0 => simd_eq(a, b),
33033            1 => simd_lt(a, b),
33034            2 => simd_le(a, b),
33035            3 => i64x4::ZERO,
33036            4 => simd_ne(a, b),
33037            5 => simd_ge(a, b),
33038            6 => simd_gt(a, b),
33039            _ => i64x4::splat(-1),
33040        };
33041        simd_bitmask(r)
33042    }
33043}
33044
33045/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33046///
33047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33048#[inline]
33049#[target_feature(enable = "avx512f,avx512vl")]
33050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33051#[rustc_legacy_const_generics(3)]
33052#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33053pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33054    k1: __mmask8,
33055    a: __m256i,
33056    b: __m256i,
33057) -> __mmask8 {
33058    unsafe {
33059        static_assert_uimm_bits!(IMM3, 3);
33060        let a = a.as_i64x4();
33061        let b = b.as_i64x4();
33062        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33063        let r = match IMM3 {
33064            0 => simd_and(k1, simd_eq(a, b)),
33065            1 => simd_and(k1, simd_lt(a, b)),
33066            2 => simd_and(k1, simd_le(a, b)),
33067            3 => i64x4::ZERO,
33068            4 => simd_and(k1, simd_ne(a, b)),
33069            5 => simd_and(k1, simd_ge(a, b)),
33070            6 => simd_and(k1, simd_gt(a, b)),
33071            _ => k1,
33072        };
33073        simd_bitmask(r)
33074    }
33075}
33076
33077/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33078///
33079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33080#[inline]
33081#[target_feature(enable = "avx512f,avx512vl")]
33082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33083#[rustc_legacy_const_generics(2)]
33084#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33085pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33086    unsafe {
33087        static_assert_uimm_bits!(IMM3, 3);
33088        let a = a.as_i64x2();
33089        let b = b.as_i64x2();
33090        let r = match IMM3 {
33091            0 => simd_eq(a, b),
33092            1 => simd_lt(a, b),
33093            2 => simd_le(a, b),
33094            3 => i64x2::ZERO,
33095            4 => simd_ne(a, b),
33096            5 => simd_ge(a, b),
33097            6 => simd_gt(a, b),
33098            _ => i64x2::splat(-1),
33099        };
33100        simd_bitmask(r)
33101    }
33102}
33103
33104/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33105///
33106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33107#[inline]
33108#[target_feature(enable = "avx512f,avx512vl")]
33109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33110#[rustc_legacy_const_generics(3)]
33111#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33112pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33113    k1: __mmask8,
33114    a: __m128i,
33115    b: __m128i,
33116) -> __mmask8 {
33117    unsafe {
33118        static_assert_uimm_bits!(IMM3, 3);
33119        let a = a.as_i64x2();
33120        let b = b.as_i64x2();
33121        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33122        let r = match IMM3 {
33123            0 => simd_and(k1, simd_eq(a, b)),
33124            1 => simd_and(k1, simd_lt(a, b)),
33125            2 => simd_and(k1, simd_le(a, b)),
33126            3 => i64x2::ZERO,
33127            4 => simd_and(k1, simd_ne(a, b)),
33128            5 => simd_and(k1, simd_ge(a, b)),
33129            6 => simd_and(k1, simd_gt(a, b)),
33130            _ => k1,
33131        };
33132        simd_bitmask(r)
33133    }
33134}
33135
33136/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33137///
33138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33139#[inline]
33140#[target_feature(enable = "avx512f")]
33141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33142pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33143    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33144}
33145
33146/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33147///
33148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33149#[inline]
33150#[target_feature(enable = "avx512f")]
33151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33152pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33153    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33154}
33155
33156/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33157///
33158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33159#[inline]
33160#[target_feature(enable = "avx512f")]
33161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33162pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33163    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33164}
33165
33166/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33167///
33168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33169#[inline]
33170#[target_feature(enable = "avx512f")]
33171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33172pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33173    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33174}
33175
33176/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33177///
33178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33179#[inline]
33180#[target_feature(enable = "avx512f")]
33181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33182pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33183    unsafe {
33184        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33185        let a = _mm256_add_ps(
33186            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33187            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33188        );
33189        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33190        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33191        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
33192    }
33193}
33194
33195/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33196///
33197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33198#[inline]
33199#[target_feature(enable = "avx512f")]
33200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33201pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33202    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33203}
33204
33205/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33206///
33207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33208#[inline]
33209#[target_feature(enable = "avx512f")]
33210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33211pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33212    unsafe {
33213        let a = _mm256_add_pd(
33214            _mm512_extractf64x4_pd::<0>(a),
33215            _mm512_extractf64x4_pd::<1>(a),
33216        );
33217        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33218        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
33219    }
33220}
33221
33222/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33223///
33224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33225#[inline]
33226#[target_feature(enable = "avx512f")]
33227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33228pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33229    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33230}
33231
33232/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33233///
33234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33235#[inline]
33236#[target_feature(enable = "avx512f")]
33237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33238pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33239    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33240}
33241
33242/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33243///
33244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33245#[inline]
33246#[target_feature(enable = "avx512f")]
33247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33248pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33249    unsafe {
33250        simd_reduce_mul_unordered(simd_select_bitmask(
33251            k,
33252            a.as_i32x16(),
33253            _mm512_set1_epi32(1).as_i32x16(),
33254        ))
33255    }
33256}
33257
33258/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33259///
33260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33261#[inline]
33262#[target_feature(enable = "avx512f")]
33263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33264pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33265    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33266}
33267
33268/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33269///
33270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33271#[inline]
33272#[target_feature(enable = "avx512f")]
33273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33274pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33275    unsafe {
33276        simd_reduce_mul_unordered(simd_select_bitmask(
33277            k,
33278            a.as_i64x8(),
33279            _mm512_set1_epi64(1).as_i64x8(),
33280        ))
33281    }
33282}
33283
33284/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33285///
33286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33287#[inline]
33288#[target_feature(enable = "avx512f")]
33289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33290pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33291    unsafe {
33292        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33293        let a = _mm256_mul_ps(
33294            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33295            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33296        );
33297        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33298        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33299        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
33300    }
33301}
33302
33303/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33304///
33305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33306#[inline]
33307#[target_feature(enable = "avx512f")]
33308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33309pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33310    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33311}
33312
33313/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33314///
33315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33316#[inline]
33317#[target_feature(enable = "avx512f")]
33318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33319pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33320    unsafe {
33321        let a = _mm256_mul_pd(
33322            _mm512_extractf64x4_pd::<0>(a),
33323            _mm512_extractf64x4_pd::<1>(a),
33324        );
33325        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33326        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
33327    }
33328}
33329
33330/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33331///
33332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33333#[inline]
33334#[target_feature(enable = "avx512f")]
33335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33336pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33337    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33338}
33339
33340/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33341///
33342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33343#[inline]
33344#[target_feature(enable = "avx512f")]
33345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33346pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33347    unsafe { simd_reduce_max(a.as_i32x16()) }
33348}
33349
33350/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33351///
33352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33353#[inline]
33354#[target_feature(enable = "avx512f")]
33355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33356pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33357    unsafe {
33358        simd_reduce_max(simd_select_bitmask(
33359            k,
33360            a.as_i32x16(),
33361            i32x16::splat(i32::MIN),
33362        ))
33363    }
33364}
33365
33366/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33367///
33368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33369#[inline]
33370#[target_feature(enable = "avx512f")]
33371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33372pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33373    unsafe { simd_reduce_max(a.as_i64x8()) }
33374}
33375
33376/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33377///
33378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33379#[inline]
33380#[target_feature(enable = "avx512f")]
33381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33382pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33383    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33384}
33385
33386/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33387///
33388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33389#[inline]
33390#[target_feature(enable = "avx512f")]
33391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33392pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33393    unsafe { simd_reduce_max(a.as_u32x16()) }
33394}
33395
33396/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33397///
33398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33399#[inline]
33400#[target_feature(enable = "avx512f")]
33401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33402pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33403    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33404}
33405
33406/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33407///
33408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33409#[inline]
33410#[target_feature(enable = "avx512f")]
33411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33412pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33413    unsafe { simd_reduce_max(a.as_u64x8()) }
33414}
33415
33416/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33417///
33418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33419#[inline]
33420#[target_feature(enable = "avx512f")]
33421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33422pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33423    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33424}
33425
33426/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33433    unsafe {
33434        let a = _mm256_max_ps(
33435            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33436            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33437        );
33438        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33439        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33440        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33441    }
33442}
33443
33444/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33445///
33446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33447#[inline]
33448#[target_feature(enable = "avx512f")]
33449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33450pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33451    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33452}
33453
33454/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33455///
33456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33457#[inline]
33458#[target_feature(enable = "avx512f")]
33459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33460pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33461    unsafe {
33462        let a = _mm256_max_pd(
33463            _mm512_extractf64x4_pd::<0>(a),
33464            _mm512_extractf64x4_pd::<1>(a),
33465        );
33466        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33467        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33468    }
33469}
33470
33471/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33472///
33473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33474#[inline]
33475#[target_feature(enable = "avx512f")]
33476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33477pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33478    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33479}
33480
33481/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33482///
33483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33484#[inline]
33485#[target_feature(enable = "avx512f")]
33486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33487pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33488    unsafe { simd_reduce_min(a.as_i32x16()) }
33489}
33490
33491/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33492///
33493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33494#[inline]
33495#[target_feature(enable = "avx512f")]
33496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33497pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33498    unsafe {
33499        simd_reduce_min(simd_select_bitmask(
33500            k,
33501            a.as_i32x16(),
33502            i32x16::splat(i32::MAX),
33503        ))
33504    }
33505}
33506
33507/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33508///
33509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33510#[inline]
33511#[target_feature(enable = "avx512f")]
33512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33513pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33514    unsafe { simd_reduce_min(a.as_i64x8()) }
33515}
33516
33517/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33518///
33519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33520#[inline]
33521#[target_feature(enable = "avx512f")]
33522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33523pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33524    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33525}
33526
33527/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33528///
33529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33530#[inline]
33531#[target_feature(enable = "avx512f")]
33532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33533pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33534    unsafe { simd_reduce_min(a.as_u32x16()) }
33535}
33536
33537/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33538///
33539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33540#[inline]
33541#[target_feature(enable = "avx512f")]
33542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33543pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33544    unsafe {
33545        simd_reduce_min(simd_select_bitmask(
33546            k,
33547            a.as_u32x16(),
33548            u32x16::splat(u32::MAX),
33549        ))
33550    }
33551}
33552
33553/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33554///
33555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33556#[inline]
33557#[target_feature(enable = "avx512f")]
33558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33559pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33560    unsafe { simd_reduce_min(a.as_u64x8()) }
33561}
33562
33563/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33564///
33565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33566#[inline]
33567#[target_feature(enable = "avx512f")]
33568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33569pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33570    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33571}
33572
33573/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33574///
33575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33576#[inline]
33577#[target_feature(enable = "avx512f")]
33578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33579pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33580    unsafe {
33581        let a = _mm256_min_ps(
33582            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33583            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33584        );
33585        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33586        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33587        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33588    }
33589}
33590
33591/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33592///
33593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33594#[inline]
33595#[target_feature(enable = "avx512f")]
33596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33597pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33598    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33599}
33600
33601/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33602///
33603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33604#[inline]
33605#[target_feature(enable = "avx512f")]
33606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33607pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33608    unsafe {
33609        let a = _mm256_min_pd(
33610            _mm512_extractf64x4_pd::<0>(a),
33611            _mm512_extractf64x4_pd::<1>(a),
33612        );
33613        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33614        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33615    }
33616}
33617
33618/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33619///
33620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33621#[inline]
33622#[target_feature(enable = "avx512f")]
33623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33624pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33625    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33626}
33627
33628/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33629///
33630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33631#[inline]
33632#[target_feature(enable = "avx512f")]
33633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33634pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33635    unsafe { simd_reduce_and(a.as_i32x16()) }
33636}
33637
33638/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33639///
33640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33641#[inline]
33642#[target_feature(enable = "avx512f")]
33643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33644pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33645    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33646}
33647
33648/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33649///
33650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33651#[inline]
33652#[target_feature(enable = "avx512f")]
33653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33654pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33655    unsafe { simd_reduce_and(a.as_i64x8()) }
33656}
33657
33658/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33659///
33660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33661#[inline]
33662#[target_feature(enable = "avx512f")]
33663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33664pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33665    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33666}
33667
33668/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33669///
33670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33671#[inline]
33672#[target_feature(enable = "avx512f")]
33673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33674pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33675    unsafe { simd_reduce_or(a.as_i32x16()) }
33676}
33677
33678/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33685    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33686}
33687
33688/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33695    unsafe { simd_reduce_or(a.as_i64x8()) }
33696}
33697
33698/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33705    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33706}
33707
33708/// Returns vector of type `__m512d` with indeterminate elements.
33709/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33710/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33711/// In practice, this is typically equivalent to [`mem::zeroed`].
33712///
33713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33714#[inline]
33715#[target_feature(enable = "avx512f")]
33716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33717// This intrinsic has no corresponding instruction.
33718pub fn _mm512_undefined_pd() -> __m512d {
33719    unsafe { const { mem::zeroed() } }
33720}
33721
33722/// Returns vector of type `__m512` with indeterminate elements.
33723/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33724/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33725/// In practice, this is typically equivalent to [`mem::zeroed`].
33726///
33727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33728#[inline]
33729#[target_feature(enable = "avx512f")]
33730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33731// This intrinsic has no corresponding instruction.
33732pub fn _mm512_undefined_ps() -> __m512 {
33733    unsafe { const { mem::zeroed() } }
33734}
33735
33736/// Return vector of type __m512i with indeterminate elements.
33737/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33738/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33739/// In practice, this is typically equivalent to [`mem::zeroed`].
33740///
33741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33742#[inline]
33743#[target_feature(enable = "avx512f")]
33744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33745// This intrinsic has no corresponding instruction.
33746pub fn _mm512_undefined_epi32() -> __m512i {
33747    unsafe { const { mem::zeroed() } }
33748}
33749
33750/// Return vector of type __m512 with indeterminate elements.
33751/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33752/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33753/// In practice, this is typically equivalent to [`mem::zeroed`].
33754///
33755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33756#[inline]
33757#[target_feature(enable = "avx512f")]
33758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33759// This intrinsic has no corresponding instruction.
33760pub fn _mm512_undefined() -> __m512 {
33761    unsafe { const { mem::zeroed() } }
33762}
33763
33764/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33765///
33766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33767#[inline]
33768#[target_feature(enable = "avx512f")]
33769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33770#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33771pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33772    ptr::read_unaligned(mem_addr as *const __m512i)
33773}
33774
33775/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33776///
33777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33778#[inline]
33779#[target_feature(enable = "avx512f,avx512vl")]
33780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33781#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33782pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33783    ptr::read_unaligned(mem_addr as *const __m256i)
33784}
33785
33786/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33787///
33788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33789#[inline]
33790#[target_feature(enable = "avx512f,avx512vl")]
33791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33792#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33793pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33794    ptr::read_unaligned(mem_addr as *const __m128i)
33795}
33796
33797/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33798///
33799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33800#[inline]
33801#[target_feature(enable = "avx512f")]
33802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33803#[cfg_attr(test, assert_instr(vpmovdw))]
33804pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33805    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
33806}
33807
33808/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33809///
33810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33811#[inline]
33812#[target_feature(enable = "avx512f,avx512vl")]
33813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33814#[cfg_attr(test, assert_instr(vpmovdw))]
33815pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33816    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33817}
33818
33819/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33820///
33821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33822#[inline]
33823#[target_feature(enable = "avx512f,avx512vl")]
33824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33825#[cfg_attr(test, assert_instr(vpmovdw))]
33826pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33827    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33828}
33829
33830/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33833#[inline]
33834#[target_feature(enable = "avx512f")]
33835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33836#[cfg_attr(test, assert_instr(vpmovsdw))]
33837pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33838    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
33839}
33840
33841/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33842///
33843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33844#[inline]
33845#[target_feature(enable = "avx512f,avx512vl")]
33846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33847#[cfg_attr(test, assert_instr(vpmovsdw))]
33848pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33849    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33850}
33851
33852/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33853///
33854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33855#[inline]
33856#[target_feature(enable = "avx512f,avx512vl")]
33857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33858#[cfg_attr(test, assert_instr(vpmovsdw))]
33859pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33860    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33861}
33862
33863/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33864///
33865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33866#[inline]
33867#[target_feature(enable = "avx512f")]
33868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33869#[cfg_attr(test, assert_instr(vpmovusdw))]
33870pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33871    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
33872}
33873
33874/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33875///
33876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33877#[inline]
33878#[target_feature(enable = "avx512f,avx512vl")]
33879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33880#[cfg_attr(test, assert_instr(vpmovusdw))]
33881pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33882    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33883}
33884
33885/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33886///
33887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33888#[inline]
33889#[target_feature(enable = "avx512f,avx512vl")]
33890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33891#[cfg_attr(test, assert_instr(vpmovusdw))]
33892pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33893    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33894}
33895
33896/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33897///
33898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33899#[inline]
33900#[target_feature(enable = "avx512f")]
33901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33902#[cfg_attr(test, assert_instr(vpmovdb))]
33903pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33904    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33905}
33906
33907/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33908///
33909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33910#[inline]
33911#[target_feature(enable = "avx512f,avx512vl")]
33912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33913#[cfg_attr(test, assert_instr(vpmovdb))]
33914pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33915    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33916}
33917
33918/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33919///
33920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33921#[inline]
33922#[target_feature(enable = "avx512f,avx512vl")]
33923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33924#[cfg_attr(test, assert_instr(vpmovdb))]
33925pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33926    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33927}
33928
33929/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33930///
33931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33932#[inline]
33933#[target_feature(enable = "avx512f")]
33934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33935#[cfg_attr(test, assert_instr(vpmovsdb))]
33936pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33937    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33938}
33939
33940/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33941///
33942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33943#[inline]
33944#[target_feature(enable = "avx512f,avx512vl")]
33945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33946#[cfg_attr(test, assert_instr(vpmovsdb))]
33947pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33948    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
33949}
33950
33951/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33952///
33953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33954#[inline]
33955#[target_feature(enable = "avx512f,avx512vl")]
33956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33957#[cfg_attr(test, assert_instr(vpmovsdb))]
33958pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33959    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
33960}
33961
33962/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33963///
33964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
33965#[inline]
33966#[target_feature(enable = "avx512f")]
33967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33968#[cfg_attr(test, assert_instr(vpmovusdb))]
33969pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33970    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
33971}
33972
33973/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33974///
33975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
33976#[inline]
33977#[target_feature(enable = "avx512f,avx512vl")]
33978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33979#[cfg_attr(test, assert_instr(vpmovusdb))]
33980pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33981    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
33982}
33983
33984/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33985///
33986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
33987#[inline]
33988#[target_feature(enable = "avx512f,avx512vl")]
33989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33990#[cfg_attr(test, assert_instr(vpmovusdb))]
33991pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33992    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
33993}
33994
33995/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33996///
33997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
33998#[inline]
33999#[target_feature(enable = "avx512f")]
34000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34001#[cfg_attr(test, assert_instr(vpmovqw))]
34002pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34003    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
34004}
34005
34006/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34007///
34008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34009#[inline]
34010#[target_feature(enable = "avx512f,avx512vl")]
34011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34012#[cfg_attr(test, assert_instr(vpmovqw))]
34013pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34014    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34015}
34016
34017/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34018///
34019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34020#[inline]
34021#[target_feature(enable = "avx512f,avx512vl")]
34022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34023#[cfg_attr(test, assert_instr(vpmovqw))]
34024pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34025    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34026}
34027
34028/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34029///
34030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34031#[inline]
34032#[target_feature(enable = "avx512f")]
34033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34034#[cfg_attr(test, assert_instr(vpmovsqw))]
34035pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34036    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
34037}
34038
34039/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34040///
34041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34042#[inline]
34043#[target_feature(enable = "avx512f,avx512vl")]
34044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34045#[cfg_attr(test, assert_instr(vpmovsqw))]
34046pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34047    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34048}
34049
34050/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34051///
34052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34053#[inline]
34054#[target_feature(enable = "avx512f,avx512vl")]
34055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34056#[cfg_attr(test, assert_instr(vpmovsqw))]
34057pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34058    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34059}
34060
34061/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34062///
34063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34064#[inline]
34065#[target_feature(enable = "avx512f")]
34066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34067#[cfg_attr(test, assert_instr(vpmovusqw))]
34068pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34069    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
34070}
34071
34072/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34073///
34074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34075#[inline]
34076#[target_feature(enable = "avx512f,avx512vl")]
34077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34078#[cfg_attr(test, assert_instr(vpmovusqw))]
34079pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34080    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34081}
34082
34083/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34084///
34085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34086#[inline]
34087#[target_feature(enable = "avx512f,avx512vl")]
34088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34089#[cfg_attr(test, assert_instr(vpmovusqw))]
34090pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34091    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34092}
34093
34094/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34095///
34096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34097#[inline]
34098#[target_feature(enable = "avx512f")]
34099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34100#[cfg_attr(test, assert_instr(vpmovqb))]
34101pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34102    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34103}
34104
34105/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34106///
34107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34108#[inline]
34109#[target_feature(enable = "avx512f,avx512vl")]
34110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34111#[cfg_attr(test, assert_instr(vpmovqb))]
34112pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34113    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34114}
34115
34116/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34117///
34118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34119#[inline]
34120#[target_feature(enable = "avx512f,avx512vl")]
34121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34122#[cfg_attr(test, assert_instr(vpmovqb))]
34123pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34124    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34125}
34126
34127/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34128///
34129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34130#[inline]
34131#[target_feature(enable = "avx512f")]
34132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34133#[cfg_attr(test, assert_instr(vpmovsqb))]
34134pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34135    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34136}
34137
34138/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34139///
34140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34141#[inline]
34142#[target_feature(enable = "avx512f,avx512vl")]
34143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34144#[cfg_attr(test, assert_instr(vpmovsqb))]
34145pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34146    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34147}
34148
34149/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34150///
34151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34152#[inline]
34153#[target_feature(enable = "avx512f,avx512vl")]
34154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34155#[cfg_attr(test, assert_instr(vpmovsqb))]
34156pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34157    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34158}
34159
34160/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34161///
34162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34163#[inline]
34164#[target_feature(enable = "avx512f")]
34165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34166#[cfg_attr(test, assert_instr(vpmovusqb))]
34167pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34168    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34169}
34170
34171/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34172///
34173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34174#[inline]
34175#[target_feature(enable = "avx512f,avx512vl")]
34176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34177#[cfg_attr(test, assert_instr(vpmovusqb))]
34178pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34179    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34180}
34181
34182/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34183///
34184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34185#[inline]
34186#[target_feature(enable = "avx512f,avx512vl")]
34187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34188#[cfg_attr(test, assert_instr(vpmovusqb))]
34189pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34190    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34191}
34192
34193///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34194///
34195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34196#[inline]
34197#[target_feature(enable = "avx512f")]
34198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34199#[cfg_attr(test, assert_instr(vpmovqd))]
34200pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34201    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
34202}
34203
34204///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34205///
34206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34207#[inline]
34208#[target_feature(enable = "avx512f,avx512vl")]
34209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34210#[cfg_attr(test, assert_instr(vpmovqd))]
34211pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34212    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34213}
34214
34215///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34216///
34217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34218#[inline]
34219#[target_feature(enable = "avx512f,avx512vl")]
34220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34221#[cfg_attr(test, assert_instr(vpmovqd))]
34222pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34223    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34224}
34225
34226/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34227///
34228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34229#[inline]
34230#[target_feature(enable = "avx512f")]
34231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34232#[cfg_attr(test, assert_instr(vpmovsqd))]
34233pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34234    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
34235}
34236
34237/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34238///
34239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34240#[inline]
34241#[target_feature(enable = "avx512f,avx512vl")]
34242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34243#[cfg_attr(test, assert_instr(vpmovsqd))]
34244pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34245    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34246}
34247
34248/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34249///
34250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34251#[inline]
34252#[target_feature(enable = "avx512f,avx512vl")]
34253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34254#[cfg_attr(test, assert_instr(vpmovsqd))]
34255pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34256    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34257}
34258
34259/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34262#[inline]
34263#[target_feature(enable = "avx512f")]
34264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34265#[cfg_attr(test, assert_instr(vpmovusqd))]
34266pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34267    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
34268}
34269
34270/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34271///
34272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34273#[inline]
34274#[target_feature(enable = "avx512f,avx512vl")]
34275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34276#[cfg_attr(test, assert_instr(vpmovusqd))]
34277pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34278    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34279}
34280
34281/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34282///
34283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34284#[inline]
34285#[target_feature(enable = "avx512f,avx512vl")]
34286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34287#[cfg_attr(test, assert_instr(vpmovusqd))]
34288pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34289    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34290}
34291
34292/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34293///
34294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34295#[inline]
34296#[target_feature(enable = "avx512f")]
34297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34298#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34299pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34300    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34301}
34302
34303/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34304///
34305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34306#[inline]
34307#[target_feature(enable = "avx512f,avx512vl")]
34308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34309#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34310pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34311    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34312}
34313
34314/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34315///
34316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34317#[inline]
34318#[target_feature(enable = "avx512f,avx512vl")]
34319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34320#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34321pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34322    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34323}
34324
34325/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34326///
34327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34328#[inline]
34329#[target_feature(enable = "avx512f")]
34330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34331#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34332pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34333    ptr::read_unaligned(mem_addr as *const __m512i)
34334}
34335
34336/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34337///
34338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34339#[inline]
34340#[target_feature(enable = "avx512f,avx512vl")]
34341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34342#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34343pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34344    ptr::read_unaligned(mem_addr as *const __m256i)
34345}
34346
34347/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34348///
34349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34350#[inline]
34351#[target_feature(enable = "avx512f,avx512vl")]
34352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34353#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34354pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34355    ptr::read_unaligned(mem_addr as *const __m128i)
34356}
34357
34358/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34359///
34360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34361#[inline]
34362#[target_feature(enable = "avx512f")]
34363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34364#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34365pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34366    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34367}
34368
34369/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34370///
34371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34372#[inline]
34373#[target_feature(enable = "avx512f,avx512vl")]
34374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34375#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34376pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34377    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34378}
34379
34380/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34381///
34382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34383#[inline]
34384#[target_feature(enable = "avx512f,avx512vl")]
34385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34386#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34387pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34388    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34389}
34390
34391/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34392///
34393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34394#[inline]
34395#[target_feature(enable = "avx512f")]
34396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34397#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34398pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
34399    ptr::read_unaligned(mem_addr)
34400}
34401
34402/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34403///
34404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34405#[inline]
34406#[target_feature(enable = "avx512f")]
34407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34408#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34409pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34410    ptr::write_unaligned(mem_addr, a);
34411}
34412
34413/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34414/// floating-point elements) from memory into result.
34415/// `mem_addr` does not need to be aligned on any particular boundary.
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34418#[inline]
34419#[target_feature(enable = "avx512f")]
34420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34421#[cfg_attr(test, assert_instr(vmovups))]
34422pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34423    ptr::read_unaligned(mem_addr as *const __m512d)
34424}
34425
34426/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34427/// floating-point elements) from `a` into memory.
34428/// `mem_addr` does not need to be aligned on any particular boundary.
34429///
34430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34431#[inline]
34432#[target_feature(enable = "avx512f")]
34433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34434#[cfg_attr(test, assert_instr(vmovups))]
34435pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34436    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34437}
34438
34439/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34440/// floating-point elements) from memory into result.
34441/// `mem_addr` does not need to be aligned on any particular boundary.
34442///
34443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34444#[inline]
34445#[target_feature(enable = "avx512f")]
34446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34447#[cfg_attr(test, assert_instr(vmovups))]
34448pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34449    ptr::read_unaligned(mem_addr as *const __m512)
34450}
34451
34452/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34453/// floating-point elements) from `a` into memory.
34454/// `mem_addr` does not need to be aligned on any particular boundary.
34455///
34456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34457#[inline]
34458#[target_feature(enable = "avx512f")]
34459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34460#[cfg_attr(test, assert_instr(vmovups))]
34461pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34462    ptr::write_unaligned(mem_addr as *mut __m512, a);
34463}
34464
34465/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34466///
34467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34468#[inline]
34469#[target_feature(enable = "avx512f")]
34470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34471#[cfg_attr(
34472    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34473    assert_instr(vmovaps)
34474)] //should be vmovdqa32
34475pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
34476    ptr::read(mem_addr)
34477}
34478
34479/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34480///
34481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34482#[inline]
34483#[target_feature(enable = "avx512f")]
34484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34485#[cfg_attr(
34486    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34487    assert_instr(vmovaps)
34488)] //should be vmovdqa32
34489pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34490    ptr::write(mem_addr, a);
34491}
34492
34493/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34494///
34495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34496#[inline]
34497#[target_feature(enable = "avx512f")]
34498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34499#[cfg_attr(
34500    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34501    assert_instr(vmovaps)
34502)] //should be vmovdqa32
34503pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34504    ptr::read(mem_addr as *const __m512i)
34505}
34506
34507/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34508///
34509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34510#[inline]
34511#[target_feature(enable = "avx512f,avx512vl")]
34512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34513#[cfg_attr(
34514    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34515    assert_instr(vmovaps)
34516)] //should be vmovdqa32
34517pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34518    ptr::read(mem_addr as *const __m256i)
34519}
34520
34521/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34522///
34523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34524#[inline]
34525#[target_feature(enable = "avx512f,avx512vl")]
34526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34527#[cfg_attr(
34528    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34529    assert_instr(vmovaps)
34530)] //should be vmovdqa32
34531pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34532    ptr::read(mem_addr as *const __m128i)
34533}
34534
34535/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34536///
34537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34538#[inline]
34539#[target_feature(enable = "avx512f")]
34540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34541#[cfg_attr(
34542    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34543    assert_instr(vmovaps)
34544)] //should be vmovdqa32
34545pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34546    ptr::write(mem_addr as *mut __m512i, a);
34547}
34548
34549/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34550///
34551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34552#[inline]
34553#[target_feature(enable = "avx512f,avx512vl")]
34554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34555#[cfg_attr(
34556    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34557    assert_instr(vmovaps)
34558)] //should be vmovdqa32
34559pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34560    ptr::write(mem_addr as *mut __m256i, a);
34561}
34562
34563/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34564///
34565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34566#[inline]
34567#[target_feature(enable = "avx512f,avx512vl")]
34568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34569#[cfg_attr(
34570    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34571    assert_instr(vmovaps)
34572)] //should be vmovdqa32
34573pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34574    ptr::write(mem_addr as *mut __m128i, a);
34575}
34576
34577/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34578///
34579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34580#[inline]
34581#[target_feature(enable = "avx512f")]
34582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34583#[cfg_attr(
34584    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34585    assert_instr(vmovaps)
34586)] //should be vmovdqa64
34587pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34588    ptr::read(mem_addr as *const __m512i)
34589}
34590
34591/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34592///
34593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34594#[inline]
34595#[target_feature(enable = "avx512f,avx512vl")]
34596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34597#[cfg_attr(
34598    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34599    assert_instr(vmovaps)
34600)] //should be vmovdqa64
34601pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34602    ptr::read(mem_addr as *const __m256i)
34603}
34604
34605/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34606///
34607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34608#[inline]
34609#[target_feature(enable = "avx512f,avx512vl")]
34610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34611#[cfg_attr(
34612    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34613    assert_instr(vmovaps)
34614)] //should be vmovdqa64
34615pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34616    ptr::read(mem_addr as *const __m128i)
34617}
34618
34619/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34620///
34621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34622#[inline]
34623#[target_feature(enable = "avx512f")]
34624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34625#[cfg_attr(
34626    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34627    assert_instr(vmovaps)
34628)] //should be vmovdqa64
34629pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34630    ptr::write(mem_addr as *mut __m512i, a);
34631}
34632
34633/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34634///
34635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34636#[inline]
34637#[target_feature(enable = "avx512f,avx512vl")]
34638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34639#[cfg_attr(
34640    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34641    assert_instr(vmovaps)
34642)] //should be vmovdqa64
34643pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34644    ptr::write(mem_addr as *mut __m256i, a);
34645}
34646
34647/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34648///
34649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34650#[inline]
34651#[target_feature(enable = "avx512f,avx512vl")]
34652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34653#[cfg_attr(
34654    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34655    assert_instr(vmovaps)
34656)] //should be vmovdqa64
34657pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34658    ptr::write(mem_addr as *mut __m128i, a);
34659}
34660
34661/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34662///
34663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34664#[inline]
34665#[target_feature(enable = "avx512f")]
34666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34667#[cfg_attr(
34668    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34669    assert_instr(vmovaps)
34670)]
34671pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34672    ptr::read(mem_addr as *const __m512)
34673}
34674
34675/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34676///
34677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34678#[inline]
34679#[target_feature(enable = "avx512f")]
34680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34681#[cfg_attr(
34682    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34683    assert_instr(vmovaps)
34684)]
34685pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34686    ptr::write(mem_addr as *mut __m512, a);
34687}
34688
34689/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34690///
34691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34692#[inline]
34693#[target_feature(enable = "avx512f")]
34694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34695#[cfg_attr(
34696    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34697    assert_instr(vmovaps)
34698)] //should be vmovapd
34699pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34700    ptr::read(mem_addr as *const __m512d)
34701}
34702
34703/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34704///
34705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34706#[inline]
34707#[target_feature(enable = "avx512f")]
34708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34709#[cfg_attr(
34710    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34711    assert_instr(vmovaps)
34712)] //should be vmovapd
34713pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34714    ptr::write(mem_addr as *mut __m512d, a);
34715}
34716
34717/// Load packed 32-bit integers from memory into dst using writemask k
34718/// (elements are copied from src when the corresponding mask bit is not set).
34719/// mem_addr does not need to be aligned on any particular boundary.
34720///
34721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34722#[inline]
34723#[target_feature(enable = "avx512f")]
34724#[cfg_attr(test, assert_instr(vmovdqu32))]
34725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34726pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34727    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
34728}
34729
34730/// Load packed 32-bit integers from memory into dst using zeromask k
34731/// (elements are zeroed out when the corresponding mask bit is not set).
34732/// mem_addr does not need to be aligned on any particular boundary.
34733///
34734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34735#[inline]
34736#[target_feature(enable = "avx512f")]
34737#[cfg_attr(test, assert_instr(vmovdqu32))]
34738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34739pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34740    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34741}
34742
34743/// Load packed 64-bit integers from memory into dst using writemask k
34744/// (elements are copied from src when the corresponding mask bit is not set).
34745/// mem_addr does not need to be aligned on any particular boundary.
34746///
34747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34748#[inline]
34749#[target_feature(enable = "avx512f")]
34750#[cfg_attr(test, assert_instr(vmovdqu64))]
34751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34752pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34753    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
34754}
34755
34756/// Load packed 64-bit integers from memory into dst using zeromask k
34757/// (elements are zeroed out when the corresponding mask bit is not set).
34758/// mem_addr does not need to be aligned on any particular boundary.
34759///
34760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34761#[inline]
34762#[target_feature(enable = "avx512f")]
34763#[cfg_attr(test, assert_instr(vmovdqu64))]
34764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34765pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34766    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34767}
34768
34769/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34770/// (elements are copied from src when the corresponding mask bit is not set).
34771/// mem_addr does not need to be aligned on any particular boundary.
34772///
34773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34774#[inline]
34775#[target_feature(enable = "avx512f")]
34776#[cfg_attr(test, assert_instr(vmovups))]
34777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34778pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34779    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
34780}
34781
34782/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34783/// (elements are zeroed out when the corresponding mask bit is not set).
34784/// mem_addr does not need to be aligned on any particular boundary.
34785///
34786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34787#[inline]
34788#[target_feature(enable = "avx512f")]
34789#[cfg_attr(test, assert_instr(vmovups))]
34790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34791pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34792    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34793}
34794
34795/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34796/// (elements are copied from src when the corresponding mask bit is not set).
34797/// mem_addr does not need to be aligned on any particular boundary.
34798///
34799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34800#[inline]
34801#[target_feature(enable = "avx512f")]
34802#[cfg_attr(test, assert_instr(vmovupd))]
34803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34804pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34805    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
34806}
34807
34808/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34809/// (elements are zeroed out when the corresponding mask bit is not set).
34810/// mem_addr does not need to be aligned on any particular boundary.
34811///
34812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34813#[inline]
34814#[target_feature(enable = "avx512f")]
34815#[cfg_attr(test, assert_instr(vmovupd))]
34816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34817pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34818    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34819}
34820
34821/// Load packed 32-bit integers from memory into dst using writemask k
34822/// (elements are copied from src when the corresponding mask bit is not set).
34823/// mem_addr does not need to be aligned on any particular boundary.
34824///
34825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34826#[inline]
34827#[target_feature(enable = "avx512f,avx512vl")]
34828#[cfg_attr(test, assert_instr(vmovdqu32))]
34829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34830pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34831    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
34832}
34833
34834/// Load packed 32-bit integers from memory into dst using zeromask k
34835/// (elements are zeroed out when the corresponding mask bit is not set).
34836/// mem_addr does not need to be aligned on any particular boundary.
34837///
34838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34839#[inline]
34840#[target_feature(enable = "avx512f,avx512vl")]
34841#[cfg_attr(test, assert_instr(vmovdqu32))]
34842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34843pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34844    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34845}
34846
34847/// Load packed 64-bit integers from memory into dst using writemask k
34848/// (elements are copied from src when the corresponding mask bit is not set).
34849/// mem_addr does not need to be aligned on any particular boundary.
34850///
34851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34852#[inline]
34853#[target_feature(enable = "avx512f,avx512vl")]
34854#[cfg_attr(test, assert_instr(vmovdqu64))]
34855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34856pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34857    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
34858}
34859
34860/// Load packed 64-bit integers from memory into dst using zeromask k
34861/// (elements are zeroed out when the corresponding mask bit is not set).
34862/// mem_addr does not need to be aligned on any particular boundary.
34863///
34864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34865#[inline]
34866#[target_feature(enable = "avx512f,avx512vl")]
34867#[cfg_attr(test, assert_instr(vmovdqu64))]
34868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34869pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34870    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34871}
34872
34873/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34874/// (elements are copied from src when the corresponding mask bit is not set).
34875/// mem_addr does not need to be aligned on any particular boundary.
34876///
34877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34878#[inline]
34879#[target_feature(enable = "avx512f,avx512vl")]
34880#[cfg_attr(test, assert_instr(vmovups))]
34881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34882pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34883    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
34884}
34885
34886/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34887/// (elements are zeroed out when the corresponding mask bit is not set).
34888/// mem_addr does not need to be aligned on any particular boundary.
34889///
34890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34891#[inline]
34892#[target_feature(enable = "avx512f,avx512vl")]
34893#[cfg_attr(test, assert_instr(vmovups))]
34894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34895pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34896    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34897}
34898
34899/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34900/// (elements are copied from src when the corresponding mask bit is not set).
34901/// mem_addr does not need to be aligned on any particular boundary.
34902///
34903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34904#[inline]
34905#[target_feature(enable = "avx512f,avx512vl")]
34906#[cfg_attr(test, assert_instr(vmovupd))]
34907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34908pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34909    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
34910}
34911
34912/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34913/// (elements are zeroed out when the corresponding mask bit is not set).
34914/// mem_addr does not need to be aligned on any particular boundary.
34915///
34916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34917#[inline]
34918#[target_feature(enable = "avx512f,avx512vl")]
34919#[cfg_attr(test, assert_instr(vmovupd))]
34920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34921pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34922    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34923}
34924
34925/// Load packed 32-bit integers from memory into dst using writemask k
34926/// (elements are copied from src when the corresponding mask bit is not set).
34927/// mem_addr does not need to be aligned on any particular boundary.
34928///
34929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34930#[inline]
34931#[target_feature(enable = "avx512f,avx512vl")]
34932#[cfg_attr(test, assert_instr(vmovdqu32))]
34933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34934pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34935    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
34936}
34937
34938/// Load packed 32-bit integers from memory into dst using zeromask k
34939/// (elements are zeroed out when the corresponding mask bit is not set).
34940/// mem_addr does not need to be aligned on any particular boundary.
34941///
34942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34943#[inline]
34944#[target_feature(enable = "avx512f,avx512vl")]
34945#[cfg_attr(test, assert_instr(vmovdqu32))]
34946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34947pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34948    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
34949}
34950
34951/// Load packed 64-bit integers from memory into dst using writemask k
34952/// (elements are copied from src when the corresponding mask bit is not set).
34953/// mem_addr does not need to be aligned on any particular boundary.
34954///
34955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34956#[inline]
34957#[target_feature(enable = "avx512f,avx512vl")]
34958#[cfg_attr(test, assert_instr(vmovdqu64))]
34959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34960pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
34961    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
34962}
34963
34964/// Load packed 64-bit integers from memory into dst using zeromask k
34965/// (elements are zeroed out when the corresponding mask bit is not set).
34966/// mem_addr does not need to be aligned on any particular boundary.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
34969#[inline]
34970#[target_feature(enable = "avx512f,avx512vl")]
34971#[cfg_attr(test, assert_instr(vmovdqu64))]
34972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34973pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34974    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
34975}
34976
34977/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34978/// (elements are copied from src when the corresponding mask bit is not set).
34979/// mem_addr does not need to be aligned on any particular boundary.
34980///
34981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
34982#[inline]
34983#[target_feature(enable = "avx512f,avx512vl")]
34984#[cfg_attr(test, assert_instr(vmovups))]
34985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34986pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34987    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
34988}
34989
34990/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34991/// (elements are zeroed out when the corresponding mask bit is not set).
34992/// mem_addr does not need to be aligned on any particular boundary.
34993///
34994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
34995#[inline]
34996#[target_feature(enable = "avx512f,avx512vl")]
34997#[cfg_attr(test, assert_instr(vmovups))]
34998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34999pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35000    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
35001}
35002
35003/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35004/// (elements are copied from src when the corresponding mask bit is not set).
35005/// mem_addr does not need to be aligned on any particular boundary.
35006///
35007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
35008#[inline]
35009#[target_feature(enable = "avx512f,avx512vl")]
35010#[cfg_attr(test, assert_instr(vmovupd))]
35011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35012pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35013    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
35014}
35015
35016/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35017/// (elements are zeroed out when the corresponding mask bit is not set).
35018/// mem_addr does not need to be aligned on any particular boundary.
35019///
35020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35021#[inline]
35022#[target_feature(enable = "avx512f,avx512vl")]
35023#[cfg_attr(test, assert_instr(vmovupd))]
35024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35025pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35026    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35027}
35028
35029/// Load packed 32-bit integers from memory into dst using writemask k
35030/// (elements are copied from src when the corresponding mask bit is not set).
35031/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35032///
35033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35034#[inline]
35035#[target_feature(enable = "avx512f")]
35036#[cfg_attr(test, assert_instr(vmovdqa32))]
35037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35038pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35039    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
35040}
35041
35042/// Load packed 32-bit integers from memory into dst using zeromask k
35043/// (elements are zeroed out when the corresponding mask bit is not set).
35044/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35045///
35046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35047#[inline]
35048#[target_feature(enable = "avx512f")]
35049#[cfg_attr(test, assert_instr(vmovdqa32))]
35050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35051pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35052    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35053}
35054
35055/// Load packed 64-bit integers from memory into dst using writemask k
35056/// (elements are copied from src when the corresponding mask bit is not set).
35057/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35058///
35059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35060#[inline]
35061#[target_feature(enable = "avx512f")]
35062#[cfg_attr(test, assert_instr(vmovdqa64))]
35063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35064pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35065    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
35066}
35067
35068/// Load packed 64-bit integers from memory into dst using zeromask k
35069/// (elements are zeroed out when the corresponding mask bit is not set).
35070/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35071///
35072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35073#[inline]
35074#[target_feature(enable = "avx512f")]
35075#[cfg_attr(test, assert_instr(vmovdqa64))]
35076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35077pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35078    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35079}
35080
35081/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35082/// (elements are copied from src when the corresponding mask bit is not set).
35083/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35084///
35085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35086#[inline]
35087#[target_feature(enable = "avx512f")]
35088#[cfg_attr(test, assert_instr(vmovaps))]
35089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35090pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35091    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
35092}
35093
35094/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35095/// (elements are zeroed out when the corresponding mask bit is not set).
35096/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35097///
35098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35099#[inline]
35100#[target_feature(enable = "avx512f")]
35101#[cfg_attr(test, assert_instr(vmovaps))]
35102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35103pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35104    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35105}
35106
35107/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35108/// (elements are copied from src when the corresponding mask bit is not set).
35109/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35110///
35111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35112#[inline]
35113#[target_feature(enable = "avx512f")]
35114#[cfg_attr(test, assert_instr(vmovapd))]
35115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35116pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35117    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
35118}
35119
35120/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35121/// (elements are zeroed out when the corresponding mask bit is not set).
35122/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35123///
35124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35125#[inline]
35126#[target_feature(enable = "avx512f")]
35127#[cfg_attr(test, assert_instr(vmovapd))]
35128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35129pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35130    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35131}
35132
35133/// Load packed 32-bit integers from memory into dst using writemask k
35134/// (elements are copied from src when the corresponding mask bit is not set).
35135/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35136///
35137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35138#[inline]
35139#[target_feature(enable = "avx512f,avx512vl")]
35140#[cfg_attr(test, assert_instr(vmovdqa32))]
35141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35142pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35143    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
35144}
35145
35146/// Load packed 32-bit integers from memory into dst using zeromask k
35147/// (elements are zeroed out when the corresponding mask bit is not set).
35148/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35149///
35150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35151#[inline]
35152#[target_feature(enable = "avx512f,avx512vl")]
35153#[cfg_attr(test, assert_instr(vmovdqa32))]
35154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35155pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35156    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35157}
35158
35159/// Load packed 64-bit integers from memory into dst using writemask k
35160/// (elements are copied from src when the corresponding mask bit is not set).
35161/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35162///
35163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35164#[inline]
35165#[target_feature(enable = "avx512f,avx512vl")]
35166#[cfg_attr(test, assert_instr(vmovdqa64))]
35167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35168pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35169    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
35170}
35171
35172/// Load packed 64-bit integers from memory into dst using zeromask k
35173/// (elements are zeroed out when the corresponding mask bit is not set).
35174/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35175///
35176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35177#[inline]
35178#[target_feature(enable = "avx512f,avx512vl")]
35179#[cfg_attr(test, assert_instr(vmovdqa64))]
35180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35181pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35182    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35183}
35184
35185/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35186/// (elements are copied from src when the corresponding mask bit is not set).
35187/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35188///
35189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35190#[inline]
35191#[target_feature(enable = "avx512f,avx512vl")]
35192#[cfg_attr(test, assert_instr(vmovaps))]
35193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35194pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35195    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
35196}
35197
35198/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35199/// (elements are zeroed out when the corresponding mask bit is not set).
35200/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35201///
35202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35203#[inline]
35204#[target_feature(enable = "avx512f,avx512vl")]
35205#[cfg_attr(test, assert_instr(vmovaps))]
35206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35207pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35208    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35209}
35210
35211/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35212/// (elements are copied from src when the corresponding mask bit is not set).
35213/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35214///
35215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35216#[inline]
35217#[target_feature(enable = "avx512f,avx512vl")]
35218#[cfg_attr(test, assert_instr(vmovapd))]
35219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35220pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35221    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
35222}
35223
35224/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35225/// (elements are zeroed out when the corresponding mask bit is not set).
35226/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35227///
35228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35229#[inline]
35230#[target_feature(enable = "avx512f,avx512vl")]
35231#[cfg_attr(test, assert_instr(vmovapd))]
35232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35233pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35234    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35235}
35236
35237/// Load packed 32-bit integers from memory into dst using writemask k
35238/// (elements are copied from src when the corresponding mask bit is not set).
35239/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35240///
35241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35242#[inline]
35243#[target_feature(enable = "avx512f,avx512vl")]
35244#[cfg_attr(test, assert_instr(vmovdqa32))]
35245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35246pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35247    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
35248}
35249
35250/// Load packed 32-bit integers from memory into dst using zeromask k
35251/// (elements are zeroed out when the corresponding mask bit is not set).
35252/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35253///
35254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35255#[inline]
35256#[target_feature(enable = "avx512f,avx512vl")]
35257#[cfg_attr(test, assert_instr(vmovdqa32))]
35258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35259pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35260    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35261}
35262
35263/// Load packed 64-bit integers from memory into dst using writemask k
35264/// (elements are copied from src when the corresponding mask bit is not set).
35265/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35266///
35267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35268#[inline]
35269#[target_feature(enable = "avx512f,avx512vl")]
35270#[cfg_attr(test, assert_instr(vmovdqa64))]
35271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35272pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35273    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
35274}
35275
35276/// Load packed 64-bit integers from memory into dst using zeromask k
35277/// (elements are zeroed out when the corresponding mask bit is not set).
35278/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35279///
35280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35281#[inline]
35282#[target_feature(enable = "avx512f,avx512vl")]
35283#[cfg_attr(test, assert_instr(vmovdqa64))]
35284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35285pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35286    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35287}
35288
35289/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35290/// (elements are copied from src when the corresponding mask bit is not set).
35291/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35292///
35293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35294#[inline]
35295#[target_feature(enable = "avx512f,avx512vl")]
35296#[cfg_attr(test, assert_instr(vmovaps))]
35297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35298pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35299    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
35300}
35301
35302/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35303/// (elements are zeroed out when the corresponding mask bit is not set).
35304/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35305///
35306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35307#[inline]
35308#[target_feature(enable = "avx512f,avx512vl")]
35309#[cfg_attr(test, assert_instr(vmovaps))]
35310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35311pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35312    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35313}
35314
35315/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35316/// (elements are copied from src when the corresponding mask bit is not set).
35317/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35318///
35319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35320#[inline]
35321#[target_feature(enable = "avx512f,avx512vl")]
35322#[cfg_attr(test, assert_instr(vmovapd))]
35323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35324pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35325    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
35326}
35327
35328/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35329/// (elements are zeroed out when the corresponding mask bit is not set).
35330/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35331///
35332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35333#[inline]
35334#[target_feature(enable = "avx512f,avx512vl")]
35335#[cfg_attr(test, assert_instr(vmovapd))]
35336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35337pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35338    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35339}
35340
35341/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35342/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35343/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35344/// exception may be generated.
35345///
35346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35347#[inline]
35348#[cfg_attr(test, assert_instr(vmovss))]
35349#[target_feature(enable = "avx512f")]
35350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35351pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35352    let mut dst: __m128 = src;
35353    asm!(
35354        vpl!("vmovss {dst}{{{k}}}"),
35355        p = in(reg) mem_addr,
35356        k = in(kreg) k,
35357        dst = inout(xmm_reg) dst,
35358        options(pure, readonly, nostack, preserves_flags),
35359    );
35360    dst
35361}
35362
35363/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35364/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35365/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35366/// exception may be generated.
35367///
35368/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35369#[inline]
35370#[cfg_attr(test, assert_instr(vmovss))]
35371#[target_feature(enable = "avx512f")]
35372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35373pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35374    let mut dst: __m128;
35375    asm!(
35376        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35377        p = in(reg) mem_addr,
35378        k = in(kreg) k,
35379        dst = out(xmm_reg) dst,
35380        options(pure, readonly, nostack, preserves_flags),
35381    );
35382    dst
35383}
35384
35385/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35386/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35387/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35388/// exception may be generated.
35389///
35390/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35391#[inline]
35392#[cfg_attr(test, assert_instr(vmovsd))]
35393#[target_feature(enable = "avx512f")]
35394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35395pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35396    let mut dst: __m128d = src;
35397    asm!(
35398        vpl!("vmovsd {dst}{{{k}}}"),
35399        p = in(reg) mem_addr,
35400        k = in(kreg) k,
35401        dst = inout(xmm_reg) dst,
35402        options(pure, readonly, nostack, preserves_flags),
35403    );
35404    dst
35405}
35406
35407/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35408/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35409/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35410/// may be generated.
35411///
35412/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35413#[inline]
35414#[cfg_attr(test, assert_instr(vmovsd))]
35415#[target_feature(enable = "avx512f")]
35416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35417pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35418    let mut dst: __m128d;
35419    asm!(
35420        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35421        p = in(reg) mem_addr,
35422        k = in(kreg) k,
35423        dst = out(xmm_reg) dst,
35424        options(pure, readonly, nostack, preserves_flags),
35425    );
35426    dst
35427}
35428
35429/// Store packed 32-bit integers from a into memory using writemask k.
35430/// mem_addr does not need to be aligned on any particular boundary.
35431///
35432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35433#[inline]
35434#[target_feature(enable = "avx512f")]
35435#[cfg_attr(test, assert_instr(vmovdqu32))]
35436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35437pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35438    storedqu32_512(mem_addr, a.as_i32x16(), mask)
35439}
35440
35441/// Store packed 64-bit integers from a into memory using writemask k.
35442/// mem_addr does not need to be aligned on any particular boundary.
35443///
35444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35445#[inline]
35446#[target_feature(enable = "avx512f")]
35447#[cfg_attr(test, assert_instr(vmovdqu64))]
35448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35449pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35450    storedqu64_512(mem_addr, a.as_i64x8(), mask)
35451}
35452
35453/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35454/// mem_addr does not need to be aligned on any particular boundary.
35455///
35456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35457#[inline]
35458#[target_feature(enable = "avx512f")]
35459#[cfg_attr(test, assert_instr(vmovups))]
35460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35461pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35462    storeups_512(mem_addr, a.as_f32x16(), mask)
35463}
35464
35465/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35466/// mem_addr does not need to be aligned on any particular boundary.
35467///
35468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35469#[inline]
35470#[target_feature(enable = "avx512f")]
35471#[cfg_attr(test, assert_instr(vmovupd))]
35472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35473pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35474    storeupd_512(mem_addr, a.as_f64x8(), mask)
35475}
35476
35477/// Store packed 32-bit integers from a into memory using writemask k.
35478/// mem_addr does not need to be aligned on any particular boundary.
35479///
35480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35481#[inline]
35482#[target_feature(enable = "avx512f,avx512vl")]
35483#[cfg_attr(test, assert_instr(vmovdqu32))]
35484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35485pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35486    storedqu32_256(mem_addr, a.as_i32x8(), mask)
35487}
35488
35489/// Store packed 64-bit integers from a into memory using writemask k.
35490/// mem_addr does not need to be aligned on any particular boundary.
35491///
35492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35493#[inline]
35494#[target_feature(enable = "avx512f,avx512vl")]
35495#[cfg_attr(test, assert_instr(vmovdqu64))]
35496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35497pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35498    storedqu64_256(mem_addr, a.as_i64x4(), mask)
35499}
35500
35501/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35502/// mem_addr does not need to be aligned on any particular boundary.
35503///
35504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35505#[inline]
35506#[target_feature(enable = "avx512f,avx512vl")]
35507#[cfg_attr(test, assert_instr(vmovups))]
35508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35509pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35510    storeups_256(mem_addr, a.as_f32x8(), mask)
35511}
35512
35513/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35514/// mem_addr does not need to be aligned on any particular boundary.
35515///
35516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35517#[inline]
35518#[target_feature(enable = "avx512f,avx512vl")]
35519#[cfg_attr(test, assert_instr(vmovupd))]
35520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35521pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35522    storeupd_256(mem_addr, a.as_f64x4(), mask)
35523}
35524
35525/// Store packed 32-bit integers from a into memory using writemask k.
35526/// mem_addr does not need to be aligned on any particular boundary.
35527///
35528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35529#[inline]
35530#[target_feature(enable = "avx512f,avx512vl")]
35531#[cfg_attr(test, assert_instr(vmovdqu32))]
35532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35533pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35534    storedqu32_128(mem_addr, a.as_i32x4(), mask)
35535}
35536
35537/// Store packed 64-bit integers from a into memory using writemask k.
35538/// mem_addr does not need to be aligned on any particular boundary.
35539///
35540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35541#[inline]
35542#[target_feature(enable = "avx512f,avx512vl")]
35543#[cfg_attr(test, assert_instr(vmovdqu64))]
35544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35545pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35546    storedqu64_128(mem_addr, a.as_i64x2(), mask)
35547}
35548
35549/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35550/// mem_addr does not need to be aligned on any particular boundary.
35551///
35552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35553#[inline]
35554#[target_feature(enable = "avx512f,avx512vl")]
35555#[cfg_attr(test, assert_instr(vmovups))]
35556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35557pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35558    storeups_128(mem_addr, a.as_f32x4(), mask)
35559}
35560
35561/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35562/// mem_addr does not need to be aligned on any particular boundary.
35563///
35564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35565#[inline]
35566#[target_feature(enable = "avx512f,avx512vl")]
35567#[cfg_attr(test, assert_instr(vmovupd))]
35568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35569pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35570    storeupd_128(mem_addr, a.as_f64x2(), mask)
35571}
35572
35573/// Store packed 32-bit integers from a into memory using writemask k.
35574/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35575///
35576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35577#[inline]
35578#[target_feature(enable = "avx512f")]
35579#[cfg_attr(test, assert_instr(vmovdqa32))]
35580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35581pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35582    storedqa32_512(mem_addr, a.as_i32x16(), mask)
35583}
35584
35585/// Store packed 64-bit integers from a into memory using writemask k.
35586/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35589#[inline]
35590#[target_feature(enable = "avx512f")]
35591#[cfg_attr(test, assert_instr(vmovdqa64))]
35592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35593pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35594    storedqa64_512(mem_addr, a.as_i64x8(), mask)
35595}
35596
35597/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35598/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35599///
35600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35601#[inline]
35602#[target_feature(enable = "avx512f")]
35603#[cfg_attr(test, assert_instr(vmovaps))]
35604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35605pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35606    storeaps_512(mem_addr, a.as_f32x16(), mask)
35607}
35608
35609/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35610/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35611///
35612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35613#[inline]
35614#[target_feature(enable = "avx512f")]
35615#[cfg_attr(test, assert_instr(vmovapd))]
35616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35617pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35618    storeapd_512(mem_addr, a.as_f64x8(), mask)
35619}
35620
35621/// Store packed 32-bit integers from a into memory using writemask k.
35622/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35623///
35624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35625#[inline]
35626#[target_feature(enable = "avx512f,avx512vl")]
35627#[cfg_attr(test, assert_instr(vmovdqa32))]
35628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35629pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35630    storedqa32_256(mem_addr, a.as_i32x8(), mask)
35631}
35632
35633/// Store packed 64-bit integers from a into memory using writemask k.
35634/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35635///
35636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35637#[inline]
35638#[target_feature(enable = "avx512f,avx512vl")]
35639#[cfg_attr(test, assert_instr(vmovdqa64))]
35640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35641pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35642    storedqa64_256(mem_addr, a.as_i64x4(), mask)
35643}
35644
35645/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35646/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35647///
35648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35649#[inline]
35650#[target_feature(enable = "avx512f,avx512vl")]
35651#[cfg_attr(test, assert_instr(vmovaps))]
35652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35653pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35654    storeaps_256(mem_addr, a.as_f32x8(), mask)
35655}
35656
35657/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35658/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35659///
35660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35661#[inline]
35662#[target_feature(enable = "avx512f,avx512vl")]
35663#[cfg_attr(test, assert_instr(vmovapd))]
35664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35665pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35666    storeapd_256(mem_addr, a.as_f64x4(), mask)
35667}
35668
35669/// Store packed 32-bit integers from a into memory using writemask k.
35670/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35671///
35672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35673#[inline]
35674#[target_feature(enable = "avx512f,avx512vl")]
35675#[cfg_attr(test, assert_instr(vmovdqa32))]
35676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35677pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35678    storedqa32_128(mem_addr, a.as_i32x4(), mask)
35679}
35680
35681/// Store packed 64-bit integers from a into memory using writemask k.
35682/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35683///
35684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35685#[inline]
35686#[target_feature(enable = "avx512f,avx512vl")]
35687#[cfg_attr(test, assert_instr(vmovdqa64))]
35688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35689pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35690    storedqa64_128(mem_addr, a.as_i64x2(), mask)
35691}
35692
35693/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35694/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35695///
35696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35697#[inline]
35698#[target_feature(enable = "avx512f,avx512vl")]
35699#[cfg_attr(test, assert_instr(vmovaps))]
35700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35701pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35702    storeaps_128(mem_addr, a.as_f32x4(), mask)
35703}
35704
35705/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35706/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35707///
35708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35709#[inline]
35710#[target_feature(enable = "avx512f,avx512vl")]
35711#[cfg_attr(test, assert_instr(vmovapd))]
35712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35713pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35714    storeapd_128(mem_addr, a.as_f64x2(), mask)
35715}
35716
35717/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35718/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35721#[inline]
35722#[cfg_attr(test, assert_instr(vmovss))]
35723#[target_feature(enable = "avx512f")]
35724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35725pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35726    asm!(
35727        vps!("vmovss", "{{{k}}}, {a}"),
35728        p = in(reg) mem_addr,
35729        k = in(kreg) k,
35730        a = in(xmm_reg) a,
35731        options(nostack, preserves_flags),
35732    );
35733}
35734
35735/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35736/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35737///
35738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35739#[inline]
35740#[cfg_attr(test, assert_instr(vmovsd))]
35741#[target_feature(enable = "avx512f")]
35742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35743pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35744    asm!(
35745        vps!("vmovsd", "{{{k}}}, {a}"),
35746        p = in(reg) mem_addr,
35747        k = in(kreg) k,
35748        a = in(xmm_reg) a,
35749        options(nostack, preserves_flags),
35750    );
35751}
35752
35753/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35754///
35755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35756#[inline]
35757#[target_feature(enable = "avx512f")]
35758#[cfg_attr(test, assert_instr(vpexpandd))]
35759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35760pub unsafe fn _mm512_mask_expandloadu_epi32(
35761    src: __m512i,
35762    k: __mmask16,
35763    mem_addr: *const i32,
35764) -> __m512i {
35765    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35766}
35767
35768/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35769///
35770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35771#[inline]
35772#[target_feature(enable = "avx512f")]
35773#[cfg_attr(test, assert_instr(vpexpandd))]
35774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35775pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35776    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35777}
35778
35779/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35780///
35781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35782#[inline]
35783#[target_feature(enable = "avx512f,avx512vl")]
35784#[cfg_attr(test, assert_instr(vpexpandd))]
35785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35786pub unsafe fn _mm256_mask_expandloadu_epi32(
35787    src: __m256i,
35788    k: __mmask8,
35789    mem_addr: *const i32,
35790) -> __m256i {
35791    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35792}
35793
35794/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35795///
35796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35797#[inline]
35798#[target_feature(enable = "avx512f,avx512vl")]
35799#[cfg_attr(test, assert_instr(vpexpandd))]
35800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35801pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35802    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35803}
35804
35805/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35806///
35807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35808#[inline]
35809#[target_feature(enable = "avx512f,avx512vl")]
35810#[cfg_attr(test, assert_instr(vpexpandd))]
35811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35812pub unsafe fn _mm_mask_expandloadu_epi32(
35813    src: __m128i,
35814    k: __mmask8,
35815    mem_addr: *const i32,
35816) -> __m128i {
35817    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35818}
35819
35820/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35821///
35822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35823#[inline]
35824#[target_feature(enable = "avx512f,avx512vl")]
35825#[cfg_attr(test, assert_instr(vpexpandd))]
35826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35827pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35828    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35829}
35830
35831/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35832///
35833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35834#[inline]
35835#[target_feature(enable = "avx512f")]
35836#[cfg_attr(test, assert_instr(vpexpandq))]
35837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35838pub unsafe fn _mm512_mask_expandloadu_epi64(
35839    src: __m512i,
35840    k: __mmask8,
35841    mem_addr: *const i64,
35842) -> __m512i {
35843    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35844}
35845
35846/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35847///
35848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35849#[inline]
35850#[target_feature(enable = "avx512f")]
35851#[cfg_attr(test, assert_instr(vpexpandq))]
35852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35853pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35854    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35855}
35856
35857/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35858///
35859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35860#[inline]
35861#[target_feature(enable = "avx512f,avx512vl")]
35862#[cfg_attr(test, assert_instr(vpexpandq))]
35863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35864pub unsafe fn _mm256_mask_expandloadu_epi64(
35865    src: __m256i,
35866    k: __mmask8,
35867    mem_addr: *const i64,
35868) -> __m256i {
35869    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35870}
35871
35872/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35873///
35874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35875#[inline]
35876#[target_feature(enable = "avx512f,avx512vl")]
35877#[cfg_attr(test, assert_instr(vpexpandq))]
35878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35879pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35880    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35881}
35882
35883/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35884///
35885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35886#[inline]
35887#[target_feature(enable = "avx512f,avx512vl")]
35888#[cfg_attr(test, assert_instr(vpexpandq))]
35889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35890pub unsafe fn _mm_mask_expandloadu_epi64(
35891    src: __m128i,
35892    k: __mmask8,
35893    mem_addr: *const i64,
35894) -> __m128i {
35895    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35896}
35897
35898/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35899///
35900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35901#[inline]
35902#[target_feature(enable = "avx512f,avx512vl")]
35903#[cfg_attr(test, assert_instr(vpexpandq))]
35904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35905pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35906    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35907}
35908
35909/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35910///
35911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35912#[inline]
35913#[target_feature(enable = "avx512f")]
35914#[cfg_attr(test, assert_instr(vexpandps))]
35915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35916pub unsafe fn _mm512_mask_expandloadu_ps(
35917    src: __m512,
35918    k: __mmask16,
35919    mem_addr: *const f32,
35920) -> __m512 {
35921    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
35922}
35923
35924/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35925///
35926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35927#[inline]
35928#[target_feature(enable = "avx512f")]
35929#[cfg_attr(test, assert_instr(vexpandps))]
35930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35931pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35932    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
35933}
35934
35935/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35936///
35937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35938#[inline]
35939#[target_feature(enable = "avx512f,avx512vl")]
35940#[cfg_attr(test, assert_instr(vexpandps))]
35941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35942pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35943    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
35944}
35945
35946/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35947///
35948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
35949#[inline]
35950#[target_feature(enable = "avx512f,avx512vl")]
35951#[cfg_attr(test, assert_instr(vexpandps))]
35952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35953pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35954    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
35955}
35956
35957/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35958///
35959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
35960#[inline]
35961#[target_feature(enable = "avx512f,avx512vl")]
35962#[cfg_attr(test, assert_instr(vexpandps))]
35963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35964pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35965    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
35966}
35967
35968/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35969///
35970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
35971#[inline]
35972#[target_feature(enable = "avx512f,avx512vl")]
35973#[cfg_attr(test, assert_instr(vexpandps))]
35974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35975pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35976    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
35977}
35978
35979/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35980///
35981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
35982#[inline]
35983#[target_feature(enable = "avx512f")]
35984#[cfg_attr(test, assert_instr(vexpandpd))]
35985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35986pub unsafe fn _mm512_mask_expandloadu_pd(
35987    src: __m512d,
35988    k: __mmask8,
35989    mem_addr: *const f64,
35990) -> __m512d {
35991    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
35992}
35993
35994/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35995///
35996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
35997#[inline]
35998#[target_feature(enable = "avx512f")]
35999#[cfg_attr(test, assert_instr(vexpandpd))]
36000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36001pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36002    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
36003}
36004
36005/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36006///
36007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
36008#[inline]
36009#[target_feature(enable = "avx512f,avx512vl")]
36010#[cfg_attr(test, assert_instr(vexpandpd))]
36011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36012pub unsafe fn _mm256_mask_expandloadu_pd(
36013    src: __m256d,
36014    k: __mmask8,
36015    mem_addr: *const f64,
36016) -> __m256d {
36017    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36018}
36019
36020/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36021///
36022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36023#[inline]
36024#[target_feature(enable = "avx512f,avx512vl")]
36025#[cfg_attr(test, assert_instr(vexpandpd))]
36026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36027pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36028    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36029}
36030
36031/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36032///
36033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36034#[inline]
36035#[target_feature(enable = "avx512f,avx512vl")]
36036#[cfg_attr(test, assert_instr(vexpandpd))]
36037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36038pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36039    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36040}
36041
36042/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36043///
36044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36045#[inline]
36046#[target_feature(enable = "avx512f,avx512vl")]
36047#[cfg_attr(test, assert_instr(vexpandpd))]
36048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36049pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36050    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36051}
36052
36053/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36054///
36055/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36056#[inline]
36057#[target_feature(enable = "avx512f")]
36058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36059pub fn _mm512_setr_pd(
36060    e0: f64,
36061    e1: f64,
36062    e2: f64,
36063    e3: f64,
36064    e4: f64,
36065    e5: f64,
36066    e6: f64,
36067    e7: f64,
36068) -> __m512d {
36069    unsafe {
36070        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36071        transmute(r)
36072    }
36073}
36074
36075/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36076///
36077/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36078#[inline]
36079#[target_feature(enable = "avx512f")]
36080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36081pub fn _mm512_set_pd(
36082    e0: f64,
36083    e1: f64,
36084    e2: f64,
36085    e3: f64,
36086    e4: f64,
36087    e5: f64,
36088    e6: f64,
36089    e7: f64,
36090) -> __m512d {
36091    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36092}
36093
36094/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36095///
36096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36097#[inline]
36098#[target_feature(enable = "avx512f")]
36099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36100#[cfg_attr(test, assert_instr(vmovss))]
36101pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36102    unsafe {
36103        let extractsrc: f32 = simd_extract!(src, 0);
36104        let mut mov: f32 = extractsrc;
36105        if (k & 0b00000001) != 0 {
36106            mov = simd_extract!(b, 0);
36107        }
36108        simd_insert!(a, 0, mov)
36109    }
36110}
36111
36112/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36113///
36114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36115#[inline]
36116#[target_feature(enable = "avx512f")]
36117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36118#[cfg_attr(test, assert_instr(vmovss))]
36119pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36120    unsafe {
36121        let mut mov: f32 = 0.;
36122        if (k & 0b00000001) != 0 {
36123            mov = simd_extract!(b, 0);
36124        }
36125        simd_insert!(a, 0, mov)
36126    }
36127}
36128
36129/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36130///
36131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36132#[inline]
36133#[target_feature(enable = "avx512f")]
36134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36135#[cfg_attr(test, assert_instr(vmovsd))]
36136pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36137    unsafe {
36138        let extractsrc: f64 = simd_extract!(src, 0);
36139        let mut mov: f64 = extractsrc;
36140        if (k & 0b00000001) != 0 {
36141            mov = simd_extract!(b, 0);
36142        }
36143        simd_insert!(a, 0, mov)
36144    }
36145}
36146
36147/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36148///
36149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36150#[inline]
36151#[target_feature(enable = "avx512f")]
36152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36153#[cfg_attr(test, assert_instr(vmovsd))]
36154pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36155    unsafe {
36156        let mut mov: f64 = 0.;
36157        if (k & 0b00000001) != 0 {
36158            mov = simd_extract!(b, 0);
36159        }
36160        simd_insert!(a, 0, mov)
36161    }
36162}
36163
36164/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36165///
36166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36167#[inline]
36168#[target_feature(enable = "avx512f")]
36169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36170#[cfg_attr(test, assert_instr(vaddss))]
36171pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36172    unsafe {
36173        let extractsrc: f32 = simd_extract!(src, 0);
36174        let mut add: f32 = extractsrc;
36175        if (k & 0b00000001) != 0 {
36176            let extracta: f32 = simd_extract!(a, 0);
36177            let extractb: f32 = simd_extract!(b, 0);
36178            add = extracta + extractb;
36179        }
36180        simd_insert!(a, 0, add)
36181    }
36182}
36183
36184/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36185///
36186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36187#[inline]
36188#[target_feature(enable = "avx512f")]
36189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36190#[cfg_attr(test, assert_instr(vaddss))]
36191pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36192    unsafe {
36193        let mut add: f32 = 0.;
36194        if (k & 0b00000001) != 0 {
36195            let extracta: f32 = simd_extract!(a, 0);
36196            let extractb: f32 = simd_extract!(b, 0);
36197            add = extracta + extractb;
36198        }
36199        simd_insert!(a, 0, add)
36200    }
36201}
36202
36203/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36204///
36205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36206#[inline]
36207#[target_feature(enable = "avx512f")]
36208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36209#[cfg_attr(test, assert_instr(vaddsd))]
36210pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36211    unsafe {
36212        let extractsrc: f64 = simd_extract!(src, 0);
36213        let mut add: f64 = extractsrc;
36214        if (k & 0b00000001) != 0 {
36215            let extracta: f64 = simd_extract!(a, 0);
36216            let extractb: f64 = simd_extract!(b, 0);
36217            add = extracta + extractb;
36218        }
36219        simd_insert!(a, 0, add)
36220    }
36221}
36222
36223/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36224///
36225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36226#[inline]
36227#[target_feature(enable = "avx512f")]
36228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36229#[cfg_attr(test, assert_instr(vaddsd))]
36230pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36231    unsafe {
36232        let mut add: f64 = 0.;
36233        if (k & 0b00000001) != 0 {
36234            let extracta: f64 = simd_extract!(a, 0);
36235            let extractb: f64 = simd_extract!(b, 0);
36236            add = extracta + extractb;
36237        }
36238        simd_insert!(a, 0, add)
36239    }
36240}
36241
36242/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36243///
36244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36245#[inline]
36246#[target_feature(enable = "avx512f")]
36247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36248#[cfg_attr(test, assert_instr(vsubss))]
36249pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36250    unsafe {
36251        let extractsrc: f32 = simd_extract!(src, 0);
36252        let mut add: f32 = extractsrc;
36253        if (k & 0b00000001) != 0 {
36254            let extracta: f32 = simd_extract!(a, 0);
36255            let extractb: f32 = simd_extract!(b, 0);
36256            add = extracta - extractb;
36257        }
36258        simd_insert!(a, 0, add)
36259    }
36260}
36261
36262/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36263///
36264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36265#[inline]
36266#[target_feature(enable = "avx512f")]
36267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36268#[cfg_attr(test, assert_instr(vsubss))]
36269pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36270    unsafe {
36271        let mut add: f32 = 0.;
36272        if (k & 0b00000001) != 0 {
36273            let extracta: f32 = simd_extract!(a, 0);
36274            let extractb: f32 = simd_extract!(b, 0);
36275            add = extracta - extractb;
36276        }
36277        simd_insert!(a, 0, add)
36278    }
36279}
36280
36281/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36282///
36283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36284#[inline]
36285#[target_feature(enable = "avx512f")]
36286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36287#[cfg_attr(test, assert_instr(vsubsd))]
36288pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36289    unsafe {
36290        let extractsrc: f64 = simd_extract!(src, 0);
36291        let mut add: f64 = extractsrc;
36292        if (k & 0b00000001) != 0 {
36293            let extracta: f64 = simd_extract!(a, 0);
36294            let extractb: f64 = simd_extract!(b, 0);
36295            add = extracta - extractb;
36296        }
36297        simd_insert!(a, 0, add)
36298    }
36299}
36300
36301/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36302///
36303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36304#[inline]
36305#[target_feature(enable = "avx512f")]
36306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36307#[cfg_attr(test, assert_instr(vsubsd))]
36308pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36309    unsafe {
36310        let mut add: f64 = 0.;
36311        if (k & 0b00000001) != 0 {
36312            let extracta: f64 = simd_extract!(a, 0);
36313            let extractb: f64 = simd_extract!(b, 0);
36314            add = extracta - extractb;
36315        }
36316        simd_insert!(a, 0, add)
36317    }
36318}
36319
36320/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36321///
36322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36323#[inline]
36324#[target_feature(enable = "avx512f")]
36325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36326#[cfg_attr(test, assert_instr(vmulss))]
36327pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36328    unsafe {
36329        let extractsrc: f32 = simd_extract!(src, 0);
36330        let mut add: f32 = extractsrc;
36331        if (k & 0b00000001) != 0 {
36332            let extracta: f32 = simd_extract!(a, 0);
36333            let extractb: f32 = simd_extract!(b, 0);
36334            add = extracta * extractb;
36335        }
36336        simd_insert!(a, 0, add)
36337    }
36338}
36339
36340/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36341///
36342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36343#[inline]
36344#[target_feature(enable = "avx512f")]
36345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36346#[cfg_attr(test, assert_instr(vmulss))]
36347pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36348    unsafe {
36349        let mut add: f32 = 0.;
36350        if (k & 0b00000001) != 0 {
36351            let extracta: f32 = simd_extract!(a, 0);
36352            let extractb: f32 = simd_extract!(b, 0);
36353            add = extracta * extractb;
36354        }
36355        simd_insert!(a, 0, add)
36356    }
36357}
36358
36359/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36360///
36361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36362#[inline]
36363#[target_feature(enable = "avx512f")]
36364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36365#[cfg_attr(test, assert_instr(vmulsd))]
36366pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36367    unsafe {
36368        let extractsrc: f64 = simd_extract!(src, 0);
36369        let mut add: f64 = extractsrc;
36370        if (k & 0b00000001) != 0 {
36371            let extracta: f64 = simd_extract!(a, 0);
36372            let extractb: f64 = simd_extract!(b, 0);
36373            add = extracta * extractb;
36374        }
36375        simd_insert!(a, 0, add)
36376    }
36377}
36378
36379/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36380///
36381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36382#[inline]
36383#[target_feature(enable = "avx512f")]
36384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36385#[cfg_attr(test, assert_instr(vmulsd))]
36386pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36387    unsafe {
36388        let mut add: f64 = 0.;
36389        if (k & 0b00000001) != 0 {
36390            let extracta: f64 = simd_extract!(a, 0);
36391            let extractb: f64 = simd_extract!(b, 0);
36392            add = extracta * extractb;
36393        }
36394        simd_insert!(a, 0, add)
36395    }
36396}
36397
36398/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36399///
36400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36401#[inline]
36402#[target_feature(enable = "avx512f")]
36403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36404#[cfg_attr(test, assert_instr(vdivss))]
36405pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36406    unsafe {
36407        let extractsrc: f32 = simd_extract!(src, 0);
36408        let mut add: f32 = extractsrc;
36409        if (k & 0b00000001) != 0 {
36410            let extracta: f32 = simd_extract!(a, 0);
36411            let extractb: f32 = simd_extract!(b, 0);
36412            add = extracta / extractb;
36413        }
36414        simd_insert!(a, 0, add)
36415    }
36416}
36417
36418/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36419///
36420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36421#[inline]
36422#[target_feature(enable = "avx512f")]
36423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36424#[cfg_attr(test, assert_instr(vdivss))]
36425pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36426    unsafe {
36427        let mut add: f32 = 0.;
36428        if (k & 0b00000001) != 0 {
36429            let extracta: f32 = simd_extract!(a, 0);
36430            let extractb: f32 = simd_extract!(b, 0);
36431            add = extracta / extractb;
36432        }
36433        simd_insert!(a, 0, add)
36434    }
36435}
36436
36437/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36438///
36439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36440#[inline]
36441#[target_feature(enable = "avx512f")]
36442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36443#[cfg_attr(test, assert_instr(vdivsd))]
36444pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36445    unsafe {
36446        let extractsrc: f64 = simd_extract!(src, 0);
36447        let mut add: f64 = extractsrc;
36448        if (k & 0b00000001) != 0 {
36449            let extracta: f64 = simd_extract!(a, 0);
36450            let extractb: f64 = simd_extract!(b, 0);
36451            add = extracta / extractb;
36452        }
36453        simd_insert!(a, 0, add)
36454    }
36455}
36456
36457/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36458///
36459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36460#[inline]
36461#[target_feature(enable = "avx512f")]
36462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36463#[cfg_attr(test, assert_instr(vdivsd))]
36464pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36465    unsafe {
36466        let mut add: f64 = 0.;
36467        if (k & 0b00000001) != 0 {
36468            let extracta: f64 = simd_extract!(a, 0);
36469            let extractb: f64 = simd_extract!(b, 0);
36470            add = extracta / extractb;
36471        }
36472        simd_insert!(a, 0, add)
36473    }
36474}
36475
36476/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36477///
36478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36479#[inline]
36480#[target_feature(enable = "avx512f")]
36481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36482#[cfg_attr(test, assert_instr(vmaxss))]
36483pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36484    unsafe {
36485        transmute(vmaxss(
36486            a.as_f32x4(),
36487            b.as_f32x4(),
36488            src.as_f32x4(),
36489            k,
36490            _MM_FROUND_CUR_DIRECTION,
36491        ))
36492    }
36493}
36494
36495/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36496///
36497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36498#[inline]
36499#[target_feature(enable = "avx512f")]
36500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36501#[cfg_attr(test, assert_instr(vmaxss))]
36502pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36503    unsafe {
36504        transmute(vmaxss(
36505            a.as_f32x4(),
36506            b.as_f32x4(),
36507            f32x4::ZERO,
36508            k,
36509            _MM_FROUND_CUR_DIRECTION,
36510        ))
36511    }
36512}
36513
36514/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36515///
36516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36517#[inline]
36518#[target_feature(enable = "avx512f")]
36519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36520#[cfg_attr(test, assert_instr(vmaxsd))]
36521pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36522    unsafe {
36523        transmute(vmaxsd(
36524            a.as_f64x2(),
36525            b.as_f64x2(),
36526            src.as_f64x2(),
36527            k,
36528            _MM_FROUND_CUR_DIRECTION,
36529        ))
36530    }
36531}
36532
36533/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36534///
36535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36536#[inline]
36537#[target_feature(enable = "avx512f")]
36538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36539#[cfg_attr(test, assert_instr(vmaxsd))]
36540pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36541    unsafe {
36542        transmute(vmaxsd(
36543            a.as_f64x2(),
36544            b.as_f64x2(),
36545            f64x2::ZERO,
36546            k,
36547            _MM_FROUND_CUR_DIRECTION,
36548        ))
36549    }
36550}
36551
36552/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36553///
36554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36555#[inline]
36556#[target_feature(enable = "avx512f")]
36557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36558#[cfg_attr(test, assert_instr(vminss))]
36559pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36560    unsafe {
36561        transmute(vminss(
36562            a.as_f32x4(),
36563            b.as_f32x4(),
36564            src.as_f32x4(),
36565            k,
36566            _MM_FROUND_CUR_DIRECTION,
36567        ))
36568    }
36569}
36570
36571/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36572///
36573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36574#[inline]
36575#[target_feature(enable = "avx512f")]
36576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36577#[cfg_attr(test, assert_instr(vminss))]
36578pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36579    unsafe {
36580        transmute(vminss(
36581            a.as_f32x4(),
36582            b.as_f32x4(),
36583            f32x4::ZERO,
36584            k,
36585            _MM_FROUND_CUR_DIRECTION,
36586        ))
36587    }
36588}
36589
36590/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36591///
36592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36593#[inline]
36594#[target_feature(enable = "avx512f")]
36595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36596#[cfg_attr(test, assert_instr(vminsd))]
36597pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36598    unsafe {
36599        transmute(vminsd(
36600            a.as_f64x2(),
36601            b.as_f64x2(),
36602            src.as_f64x2(),
36603            k,
36604            _MM_FROUND_CUR_DIRECTION,
36605        ))
36606    }
36607}
36608
36609/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36610///
36611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36612#[inline]
36613#[target_feature(enable = "avx512f")]
36614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36615#[cfg_attr(test, assert_instr(vminsd))]
36616pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36617    unsafe {
36618        transmute(vminsd(
36619            a.as_f64x2(),
36620            b.as_f64x2(),
36621            f64x2::ZERO,
36622            k,
36623            _MM_FROUND_CUR_DIRECTION,
36624        ))
36625    }
36626}
36627
36628/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36629///
36630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36631#[inline]
36632#[target_feature(enable = "avx512f")]
36633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36634#[cfg_attr(test, assert_instr(vsqrtss))]
36635pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36636    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36637}
36638
36639/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36640///
36641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36642#[inline]
36643#[target_feature(enable = "avx512f")]
36644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36645#[cfg_attr(test, assert_instr(vsqrtss))]
36646pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36647    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36648}
36649
36650/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36651///
36652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36653#[inline]
36654#[target_feature(enable = "avx512f")]
36655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36656#[cfg_attr(test, assert_instr(vsqrtsd))]
36657pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36658    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36659}
36660
36661/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36662///
36663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36664#[inline]
36665#[target_feature(enable = "avx512f")]
36666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36667#[cfg_attr(test, assert_instr(vsqrtsd))]
36668pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36669    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36670}
36671
36672/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36673///
36674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36675#[inline]
36676#[target_feature(enable = "avx512f")]
36677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36678#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36679pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36680    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36681}
36682
36683/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36684///
36685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36686#[inline]
36687#[target_feature(enable = "avx512f")]
36688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36689#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36690pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36691    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36692}
36693
36694/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36695///
36696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36697#[inline]
36698#[target_feature(enable = "avx512f")]
36699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36700#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36701pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36702    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36703}
36704
36705/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36706///
36707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36708#[inline]
36709#[target_feature(enable = "avx512f")]
36710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36711#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36712pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36713    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36714}
36715
36716/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36717///
36718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36719#[inline]
36720#[target_feature(enable = "avx512f")]
36721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36722#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36723pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36724    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36725}
36726
36727/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36728///
36729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36730#[inline]
36731#[target_feature(enable = "avx512f")]
36732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36733#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36734pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36735    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36736}
36737
36738/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36739///
36740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36741#[inline]
36742#[target_feature(enable = "avx512f")]
36743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36744#[cfg_attr(test, assert_instr(vrcp14ss))]
36745pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36746    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36747}
36748
36749/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36750///
36751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36752#[inline]
36753#[target_feature(enable = "avx512f")]
36754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36755#[cfg_attr(test, assert_instr(vrcp14ss))]
36756pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36757    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36758}
36759
36760/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36761///
36762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36763#[inline]
36764#[target_feature(enable = "avx512f")]
36765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36766#[cfg_attr(test, assert_instr(vrcp14ss))]
36767pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36768    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36769}
36770
36771/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36772///
36773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36774#[inline]
36775#[target_feature(enable = "avx512f")]
36776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36777#[cfg_attr(test, assert_instr(vrcp14sd))]
36778pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36779    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36780}
36781
36782/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36783///
36784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36785#[inline]
36786#[target_feature(enable = "avx512f")]
36787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36788#[cfg_attr(test, assert_instr(vrcp14sd))]
36789pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36790    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36791}
36792
36793/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36794///
36795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36796#[inline]
36797#[target_feature(enable = "avx512f")]
36798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36799#[cfg_attr(test, assert_instr(vrcp14sd))]
36800pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36801    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36802}
36803
36804/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36805///
36806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36807#[inline]
36808#[target_feature(enable = "avx512f")]
36809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36810#[cfg_attr(test, assert_instr(vgetexpss))]
36811pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36812    unsafe {
36813        transmute(vgetexpss(
36814            a.as_f32x4(),
36815            b.as_f32x4(),
36816            f32x4::ZERO,
36817            0b1,
36818            _MM_FROUND_NO_EXC,
36819        ))
36820    }
36821}
36822
36823/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36824///
36825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36826#[inline]
36827#[target_feature(enable = "avx512f")]
36828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36829#[cfg_attr(test, assert_instr(vgetexpss))]
36830pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36831    unsafe {
36832        transmute(vgetexpss(
36833            a.as_f32x4(),
36834            b.as_f32x4(),
36835            src.as_f32x4(),
36836            k,
36837            _MM_FROUND_NO_EXC,
36838        ))
36839    }
36840}
36841
36842/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36843///
36844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36845#[inline]
36846#[target_feature(enable = "avx512f")]
36847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36848#[cfg_attr(test, assert_instr(vgetexpss))]
36849pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36850    unsafe {
36851        transmute(vgetexpss(
36852            a.as_f32x4(),
36853            b.as_f32x4(),
36854            f32x4::ZERO,
36855            k,
36856            _MM_FROUND_NO_EXC,
36857        ))
36858    }
36859}
36860
36861/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36862///
36863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36864#[inline]
36865#[target_feature(enable = "avx512f")]
36866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36867#[cfg_attr(test, assert_instr(vgetexpsd))]
36868pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36869    unsafe {
36870        transmute(vgetexpsd(
36871            a.as_f64x2(),
36872            b.as_f64x2(),
36873            f64x2::ZERO,
36874            0b1,
36875            _MM_FROUND_NO_EXC,
36876        ))
36877    }
36878}
36879
36880/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36881///
36882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36883#[inline]
36884#[target_feature(enable = "avx512f")]
36885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36886#[cfg_attr(test, assert_instr(vgetexpsd))]
36887pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36888    unsafe {
36889        transmute(vgetexpsd(
36890            a.as_f64x2(),
36891            b.as_f64x2(),
36892            src.as_f64x2(),
36893            k,
36894            _MM_FROUND_NO_EXC,
36895        ))
36896    }
36897}
36898
36899/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36900///
36901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36902#[inline]
36903#[target_feature(enable = "avx512f")]
36904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36905#[cfg_attr(test, assert_instr(vgetexpsd))]
36906pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36907    unsafe {
36908        transmute(vgetexpsd(
36909            a.as_f64x2(),
36910            b.as_f64x2(),
36911            f64x2::ZERO,
36912            k,
36913            _MM_FROUND_NO_EXC,
36914        ))
36915    }
36916}
36917
36918/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36919/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36920///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36921///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36922///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36923///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36924/// The sign is determined by sc which can take the following values:\
36925///    _MM_MANT_SIGN_src     // sign = sign(src)\
36926///    _MM_MANT_SIGN_zero    // sign = 0\
36927///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36928/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36929///
36930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36931#[inline]
36932#[target_feature(enable = "avx512f")]
36933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36934#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36935#[rustc_legacy_const_generics(2, 3)]
36936pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36937    a: __m128,
36938    b: __m128,
36939) -> __m128 {
36940    unsafe {
36941        static_assert_uimm_bits!(NORM, 4);
36942        static_assert_uimm_bits!(SIGN, 2);
36943        let a = a.as_f32x4();
36944        let b = b.as_f32x4();
36945        let r = vgetmantss(
36946            a,
36947            b,
36948            SIGN << 2 | NORM,
36949            f32x4::ZERO,
36950            0b1,
36951            _MM_FROUND_CUR_DIRECTION,
36952        );
36953        transmute(r)
36954    }
36955}
36956
36957/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36958/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36959///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36960///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36961///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36962///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36963/// The sign is determined by sc which can take the following values:\
36964///    _MM_MANT_SIGN_src     // sign = sign(src)\
36965///    _MM_MANT_SIGN_zero    // sign = 0\
36966///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36967/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36968///
36969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
36970#[inline]
36971#[target_feature(enable = "avx512f")]
36972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36973#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36974#[rustc_legacy_const_generics(4, 5)]
36975pub fn _mm_mask_getmant_ss<
36976    const NORM: _MM_MANTISSA_NORM_ENUM,
36977    const SIGN: _MM_MANTISSA_SIGN_ENUM,
36978>(
36979    src: __m128,
36980    k: __mmask8,
36981    a: __m128,
36982    b: __m128,
36983) -> __m128 {
36984    unsafe {
36985        static_assert_uimm_bits!(NORM, 4);
36986        static_assert_uimm_bits!(SIGN, 2);
36987        let a = a.as_f32x4();
36988        let b = b.as_f32x4();
36989        let src = src.as_f32x4();
36990        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
36991        transmute(r)
36992    }
36993}
36994
36995/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36996/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36997///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36998///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36999///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37000///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37001/// The sign is determined by sc which can take the following values:\
37002///    _MM_MANT_SIGN_src     // sign = sign(src)\
37003///    _MM_MANT_SIGN_zero    // sign = 0\
37004///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37005/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37006///
37007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
37008#[inline]
37009#[target_feature(enable = "avx512f")]
37010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37011#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37012#[rustc_legacy_const_generics(3, 4)]
37013pub fn _mm_maskz_getmant_ss<
37014    const NORM: _MM_MANTISSA_NORM_ENUM,
37015    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37016>(
37017    k: __mmask8,
37018    a: __m128,
37019    b: __m128,
37020) -> __m128 {
37021    unsafe {
37022        static_assert_uimm_bits!(NORM, 4);
37023        static_assert_uimm_bits!(SIGN, 2);
37024        let a = a.as_f32x4();
37025        let b = b.as_f32x4();
37026        let r = vgetmantss(
37027            a,
37028            b,
37029            SIGN << 2 | NORM,
37030            f32x4::ZERO,
37031            k,
37032            _MM_FROUND_CUR_DIRECTION,
37033        );
37034        transmute(r)
37035    }
37036}
37037
37038/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37039/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37040///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37041///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37042///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37043///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37044/// The sign is determined by sc which can take the following values:\
37045///    _MM_MANT_SIGN_src     // sign = sign(src)\
37046///    _MM_MANT_SIGN_zero    // sign = 0\
37047///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37048/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37049///
37050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37051#[inline]
37052#[target_feature(enable = "avx512f")]
37053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37054#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37055#[rustc_legacy_const_generics(2, 3)]
37056pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37057    a: __m128d,
37058    b: __m128d,
37059) -> __m128d {
37060    unsafe {
37061        static_assert_uimm_bits!(NORM, 4);
37062        static_assert_uimm_bits!(SIGN, 2);
37063        let a = a.as_f64x2();
37064        let b = b.as_f64x2();
37065        let r = vgetmantsd(
37066            a,
37067            b,
37068            SIGN << 2 | NORM,
37069            f64x2::ZERO,
37070            0b1,
37071            _MM_FROUND_CUR_DIRECTION,
37072        );
37073        transmute(r)
37074    }
37075}
37076
37077/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37078/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37079///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37080///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37081///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37082///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37083/// The sign is determined by sc which can take the following values:\
37084///    _MM_MANT_SIGN_src     // sign = sign(src)\
37085///    _MM_MANT_SIGN_zero    // sign = 0\
37086///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37087/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37088///
37089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37090#[inline]
37091#[target_feature(enable = "avx512f")]
37092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37093#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37094#[rustc_legacy_const_generics(4, 5)]
37095pub fn _mm_mask_getmant_sd<
37096    const NORM: _MM_MANTISSA_NORM_ENUM,
37097    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37098>(
37099    src: __m128d,
37100    k: __mmask8,
37101    a: __m128d,
37102    b: __m128d,
37103) -> __m128d {
37104    unsafe {
37105        static_assert_uimm_bits!(NORM, 4);
37106        static_assert_uimm_bits!(SIGN, 2);
37107        let a = a.as_f64x2();
37108        let b = b.as_f64x2();
37109        let src = src.as_f64x2();
37110        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37111        transmute(r)
37112    }
37113}
37114
37115/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37116/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37117///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37118///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37119///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37120///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37121/// The sign is determined by sc which can take the following values:\
37122///    _MM_MANT_SIGN_src     // sign = sign(src)\
37123///    _MM_MANT_SIGN_zero    // sign = 0\
37124///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37125/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37126///
37127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37128#[inline]
37129#[target_feature(enable = "avx512f")]
37130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37131#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37132#[rustc_legacy_const_generics(3, 4)]
37133pub fn _mm_maskz_getmant_sd<
37134    const NORM: _MM_MANTISSA_NORM_ENUM,
37135    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37136>(
37137    k: __mmask8,
37138    a: __m128d,
37139    b: __m128d,
37140) -> __m128d {
37141    unsafe {
37142        static_assert_uimm_bits!(NORM, 4);
37143        static_assert_uimm_bits!(SIGN, 2);
37144        let a = a.as_f64x2();
37145        let b = b.as_f64x2();
37146        let r = vgetmantsd(
37147            a,
37148            b,
37149            SIGN << 2 | NORM,
37150            f64x2::ZERO,
37151            k,
37152            _MM_FROUND_CUR_DIRECTION,
37153        );
37154        transmute(r)
37155    }
37156}
37157
37158/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37159/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37160/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37161/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37162/// * [`_MM_FROUND_TO_POS_INF`] : round up
37163/// * [`_MM_FROUND_TO_ZERO`] : truncate
37164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37165///
37166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37167#[inline]
37168#[target_feature(enable = "avx512f")]
37169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37170#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37171#[rustc_legacy_const_generics(2)]
37172pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37173    unsafe {
37174        static_assert_uimm_bits!(IMM8, 8);
37175        let a = a.as_f32x4();
37176        let b = b.as_f32x4();
37177        let r = vrndscaless(
37178            a,
37179            b,
37180            f32x4::ZERO,
37181            0b11111111,
37182            IMM8,
37183            _MM_FROUND_CUR_DIRECTION,
37184        );
37185        transmute(r)
37186    }
37187}
37188
37189/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37190/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37191/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37192/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37193/// * [`_MM_FROUND_TO_POS_INF`] : round up
37194/// * [`_MM_FROUND_TO_ZERO`] : truncate
37195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37196///
37197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37198#[inline]
37199#[target_feature(enable = "avx512f")]
37200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37201#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37202#[rustc_legacy_const_generics(4)]
37203pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37204    src: __m128,
37205    k: __mmask8,
37206    a: __m128,
37207    b: __m128,
37208) -> __m128 {
37209    unsafe {
37210        static_assert_uimm_bits!(IMM8, 8);
37211        let a = a.as_f32x4();
37212        let b = b.as_f32x4();
37213        let src = src.as_f32x4();
37214        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37215        transmute(r)
37216    }
37217}
37218
37219/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37220/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37221/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37222/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37223/// * [`_MM_FROUND_TO_POS_INF`] : round up
37224/// * [`_MM_FROUND_TO_ZERO`] : truncate
37225/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37226///
37227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37228#[inline]
37229#[target_feature(enable = "avx512f")]
37230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37231#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37232#[rustc_legacy_const_generics(3)]
37233pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37234    unsafe {
37235        static_assert_uimm_bits!(IMM8, 8);
37236        let a = a.as_f32x4();
37237        let b = b.as_f32x4();
37238        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37239        transmute(r)
37240    }
37241}
37242
37243/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37244/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37245/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37246/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37247/// * [`_MM_FROUND_TO_POS_INF`] : round up
37248/// * [`_MM_FROUND_TO_ZERO`] : truncate
37249/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37250///
37251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37252#[inline]
37253#[target_feature(enable = "avx512f")]
37254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37255#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37256#[rustc_legacy_const_generics(2)]
37257pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37258    unsafe {
37259        static_assert_uimm_bits!(IMM8, 8);
37260        let a = a.as_f64x2();
37261        let b = b.as_f64x2();
37262        let r = vrndscalesd(
37263            a,
37264            b,
37265            f64x2::ZERO,
37266            0b11111111,
37267            IMM8,
37268            _MM_FROUND_CUR_DIRECTION,
37269        );
37270        transmute(r)
37271    }
37272}
37273
37274/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37275/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37276/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37277/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37278/// * [`_MM_FROUND_TO_POS_INF`] : round up
37279/// * [`_MM_FROUND_TO_ZERO`] : truncate
37280/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37281///
37282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37283#[inline]
37284#[target_feature(enable = "avx512f")]
37285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37286#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37287#[rustc_legacy_const_generics(4)]
37288pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37289    src: __m128d,
37290    k: __mmask8,
37291    a: __m128d,
37292    b: __m128d,
37293) -> __m128d {
37294    unsafe {
37295        static_assert_uimm_bits!(IMM8, 8);
37296        let a = a.as_f64x2();
37297        let b = b.as_f64x2();
37298        let src = src.as_f64x2();
37299        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37300        transmute(r)
37301    }
37302}
37303
37304/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37305/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37306/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37307/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37308/// * [`_MM_FROUND_TO_POS_INF`] : round up
37309/// * [`_MM_FROUND_TO_ZERO`] : truncate
37310/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37311///
37312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37313#[inline]
37314#[target_feature(enable = "avx512f")]
37315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37316#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37317#[rustc_legacy_const_generics(3)]
37318pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37319    unsafe {
37320        static_assert_uimm_bits!(IMM8, 8);
37321        let a = a.as_f64x2();
37322        let b = b.as_f64x2();
37323        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37324        transmute(r)
37325    }
37326}
37327
37328/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37329///
37330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37331#[inline]
37332#[target_feature(enable = "avx512f")]
37333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37334#[cfg_attr(test, assert_instr(vscalefss))]
37335pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37336    unsafe {
37337        let a = a.as_f32x4();
37338        let b = b.as_f32x4();
37339        transmute(vscalefss(
37340            a,
37341            b,
37342            f32x4::ZERO,
37343            0b11111111,
37344            _MM_FROUND_CUR_DIRECTION,
37345        ))
37346    }
37347}
37348
37349/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37350///
37351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37352#[inline]
37353#[target_feature(enable = "avx512f")]
37354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37355#[cfg_attr(test, assert_instr(vscalefss))]
37356pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37357    unsafe {
37358        let a = a.as_f32x4();
37359        let b = b.as_f32x4();
37360        let src = src.as_f32x4();
37361        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37362    }
37363}
37364
37365/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37366///
37367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37368#[inline]
37369#[target_feature(enable = "avx512f")]
37370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37371#[cfg_attr(test, assert_instr(vscalefss))]
37372pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37373    unsafe {
37374        transmute(vscalefss(
37375            a.as_f32x4(),
37376            b.as_f32x4(),
37377            f32x4::ZERO,
37378            k,
37379            _MM_FROUND_CUR_DIRECTION,
37380        ))
37381    }
37382}
37383
37384/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37385///
37386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37387#[inline]
37388#[target_feature(enable = "avx512f")]
37389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37390#[cfg_attr(test, assert_instr(vscalefsd))]
37391pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37392    unsafe {
37393        transmute(vscalefsd(
37394            a.as_f64x2(),
37395            b.as_f64x2(),
37396            f64x2::ZERO,
37397            0b11111111,
37398            _MM_FROUND_CUR_DIRECTION,
37399        ))
37400    }
37401}
37402
37403/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37404///
37405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37406#[inline]
37407#[target_feature(enable = "avx512f")]
37408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37409#[cfg_attr(test, assert_instr(vscalefsd))]
37410pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37411    unsafe {
37412        transmute(vscalefsd(
37413            a.as_f64x2(),
37414            b.as_f64x2(),
37415            src.as_f64x2(),
37416            k,
37417            _MM_FROUND_CUR_DIRECTION,
37418        ))
37419    }
37420}
37421
37422/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37423///
37424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37425#[inline]
37426#[target_feature(enable = "avx512f")]
37427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37428#[cfg_attr(test, assert_instr(vscalefsd))]
37429pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37430    unsafe {
37431        transmute(vscalefsd(
37432            a.as_f64x2(),
37433            b.as_f64x2(),
37434            f64x2::ZERO,
37435            k,
37436            _MM_FROUND_CUR_DIRECTION,
37437        ))
37438    }
37439}
37440
37441/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37442///
37443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37444#[inline]
37445#[target_feature(enable = "avx512f")]
37446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37447#[cfg_attr(test, assert_instr(vfmadd))]
37448pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37449    unsafe {
37450        let mut fmadd: f32 = simd_extract!(a, 0);
37451        if (k & 0b00000001) != 0 {
37452            let extractb: f32 = simd_extract!(b, 0);
37453            let extractc: f32 = simd_extract!(c, 0);
37454            fmadd = fmaf32(fmadd, extractb, extractc);
37455        }
37456        simd_insert!(a, 0, fmadd)
37457    }
37458}
37459
37460/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37461///
37462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37463#[inline]
37464#[target_feature(enable = "avx512f")]
37465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37466#[cfg_attr(test, assert_instr(vfmadd))]
37467pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37468    unsafe {
37469        let mut fmadd: f32 = 0.;
37470        if (k & 0b00000001) != 0 {
37471            let extracta: f32 = simd_extract!(a, 0);
37472            let extractb: f32 = simd_extract!(b, 0);
37473            let extractc: f32 = simd_extract!(c, 0);
37474            fmadd = fmaf32(extracta, extractb, extractc);
37475        }
37476        simd_insert!(a, 0, fmadd)
37477    }
37478}
37479
37480/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37481///
37482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37483#[inline]
37484#[target_feature(enable = "avx512f")]
37485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37486#[cfg_attr(test, assert_instr(vfmadd))]
37487pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37488    unsafe {
37489        let mut fmadd: f32 = simd_extract!(c, 0);
37490        if (k & 0b00000001) != 0 {
37491            let extracta: f32 = simd_extract!(a, 0);
37492            let extractb: f32 = simd_extract!(b, 0);
37493            fmadd = fmaf32(extracta, extractb, fmadd);
37494        }
37495        simd_insert!(c, 0, fmadd)
37496    }
37497}
37498
37499/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37500///
37501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37502#[inline]
37503#[target_feature(enable = "avx512f")]
37504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37505#[cfg_attr(test, assert_instr(vfmadd))]
37506pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37507    unsafe {
37508        let mut fmadd: f64 = simd_extract!(a, 0);
37509        if (k & 0b00000001) != 0 {
37510            let extractb: f64 = simd_extract!(b, 0);
37511            let extractc: f64 = simd_extract!(c, 0);
37512            fmadd = fmaf64(fmadd, extractb, extractc);
37513        }
37514        simd_insert!(a, 0, fmadd)
37515    }
37516}
37517
37518/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37519///
37520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37521#[inline]
37522#[target_feature(enable = "avx512f")]
37523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37524#[cfg_attr(test, assert_instr(vfmadd))]
37525pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37526    unsafe {
37527        let mut fmadd: f64 = 0.;
37528        if (k & 0b00000001) != 0 {
37529            let extracta: f64 = simd_extract!(a, 0);
37530            let extractb: f64 = simd_extract!(b, 0);
37531            let extractc: f64 = simd_extract!(c, 0);
37532            fmadd = fmaf64(extracta, extractb, extractc);
37533        }
37534        simd_insert!(a, 0, fmadd)
37535    }
37536}
37537
37538/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37539///
37540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37541#[inline]
37542#[target_feature(enable = "avx512f")]
37543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37544#[cfg_attr(test, assert_instr(vfmadd))]
37545pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37546    unsafe {
37547        let mut fmadd: f64 = simd_extract!(c, 0);
37548        if (k & 0b00000001) != 0 {
37549            let extracta: f64 = simd_extract!(a, 0);
37550            let extractb: f64 = simd_extract!(b, 0);
37551            fmadd = fmaf64(extracta, extractb, fmadd);
37552        }
37553        simd_insert!(c, 0, fmadd)
37554    }
37555}
37556
37557/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37558///
37559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37560#[inline]
37561#[target_feature(enable = "avx512f")]
37562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37563#[cfg_attr(test, assert_instr(vfmsub))]
37564pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37565    unsafe {
37566        let mut fmsub: f32 = simd_extract!(a, 0);
37567        if (k & 0b00000001) != 0 {
37568            let extractb: f32 = simd_extract!(b, 0);
37569            let extractc: f32 = simd_extract!(c, 0);
37570            let extractc = -extractc;
37571            fmsub = fmaf32(fmsub, extractb, extractc);
37572        }
37573        simd_insert!(a, 0, fmsub)
37574    }
37575}
37576
37577/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37578///
37579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37580#[inline]
37581#[target_feature(enable = "avx512f")]
37582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37583#[cfg_attr(test, assert_instr(vfmsub))]
37584pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37585    unsafe {
37586        let mut fmsub: f32 = 0.;
37587        if (k & 0b00000001) != 0 {
37588            let extracta: f32 = simd_extract!(a, 0);
37589            let extractb: f32 = simd_extract!(b, 0);
37590            let extractc: f32 = simd_extract!(c, 0);
37591            let extractc = -extractc;
37592            fmsub = fmaf32(extracta, extractb, extractc);
37593        }
37594        simd_insert!(a, 0, fmsub)
37595    }
37596}
37597
37598/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37599///
37600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37601#[inline]
37602#[target_feature(enable = "avx512f")]
37603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37604#[cfg_attr(test, assert_instr(vfmsub))]
37605pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37606    unsafe {
37607        let mut fmsub: f32 = simd_extract!(c, 0);
37608        if (k & 0b00000001) != 0 {
37609            let extracta: f32 = simd_extract!(a, 0);
37610            let extractb: f32 = simd_extract!(b, 0);
37611            let extractc = -fmsub;
37612            fmsub = fmaf32(extracta, extractb, extractc);
37613        }
37614        simd_insert!(c, 0, fmsub)
37615    }
37616}
37617
37618/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37619///
37620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37621#[inline]
37622#[target_feature(enable = "avx512f")]
37623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37624#[cfg_attr(test, assert_instr(vfmsub))]
37625pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37626    unsafe {
37627        let mut fmsub: f64 = simd_extract!(a, 0);
37628        if (k & 0b00000001) != 0 {
37629            let extractb: f64 = simd_extract!(b, 0);
37630            let extractc: f64 = simd_extract!(c, 0);
37631            let extractc = -extractc;
37632            fmsub = fmaf64(fmsub, extractb, extractc);
37633        }
37634        simd_insert!(a, 0, fmsub)
37635    }
37636}
37637
37638/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37639///
37640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37641#[inline]
37642#[target_feature(enable = "avx512f")]
37643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37644#[cfg_attr(test, assert_instr(vfmsub))]
37645pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37646    unsafe {
37647        let mut fmsub: f64 = 0.;
37648        if (k & 0b00000001) != 0 {
37649            let extracta: f64 = simd_extract!(a, 0);
37650            let extractb: f64 = simd_extract!(b, 0);
37651            let extractc: f64 = simd_extract!(c, 0);
37652            let extractc = -extractc;
37653            fmsub = fmaf64(extracta, extractb, extractc);
37654        }
37655        simd_insert!(a, 0, fmsub)
37656    }
37657}
37658
37659/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37660///
37661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37662#[inline]
37663#[target_feature(enable = "avx512f")]
37664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37665#[cfg_attr(test, assert_instr(vfmsub))]
37666pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37667    unsafe {
37668        let mut fmsub: f64 = simd_extract!(c, 0);
37669        if (k & 0b00000001) != 0 {
37670            let extracta: f64 = simd_extract!(a, 0);
37671            let extractb: f64 = simd_extract!(b, 0);
37672            let extractc = -fmsub;
37673            fmsub = fmaf64(extracta, extractb, extractc);
37674        }
37675        simd_insert!(c, 0, fmsub)
37676    }
37677}
37678
37679/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37680///
37681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37682#[inline]
37683#[target_feature(enable = "avx512f")]
37684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37685#[cfg_attr(test, assert_instr(vfnmadd))]
37686pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37687    unsafe {
37688        let mut fnmadd: f32 = simd_extract!(a, 0);
37689        if (k & 0b00000001) != 0 {
37690            let extracta = -fnmadd;
37691            let extractb: f32 = simd_extract!(b, 0);
37692            let extractc: f32 = simd_extract!(c, 0);
37693            fnmadd = fmaf32(extracta, extractb, extractc);
37694        }
37695        simd_insert!(a, 0, fnmadd)
37696    }
37697}
37698
37699/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37700///
37701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37702#[inline]
37703#[target_feature(enable = "avx512f")]
37704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37705#[cfg_attr(test, assert_instr(vfnmadd))]
37706pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37707    unsafe {
37708        let mut fnmadd: f32 = 0.;
37709        if (k & 0b00000001) != 0 {
37710            let extracta: f32 = simd_extract!(a, 0);
37711            let extracta = -extracta;
37712            let extractb: f32 = simd_extract!(b, 0);
37713            let extractc: f32 = simd_extract!(c, 0);
37714            fnmadd = fmaf32(extracta, extractb, extractc);
37715        }
37716        simd_insert!(a, 0, fnmadd)
37717    }
37718}
37719
37720/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37721///
37722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37723#[inline]
37724#[target_feature(enable = "avx512f")]
37725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37726#[cfg_attr(test, assert_instr(vfnmadd))]
37727pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37728    unsafe {
37729        let mut fnmadd: f32 = simd_extract!(c, 0);
37730        if (k & 0b00000001) != 0 {
37731            let extracta: f32 = simd_extract!(a, 0);
37732            let extracta = -extracta;
37733            let extractb: f32 = simd_extract!(b, 0);
37734            fnmadd = fmaf32(extracta, extractb, fnmadd);
37735        }
37736        simd_insert!(c, 0, fnmadd)
37737    }
37738}
37739
37740/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37741///
37742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37743#[inline]
37744#[target_feature(enable = "avx512f")]
37745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37746#[cfg_attr(test, assert_instr(vfnmadd))]
37747pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37748    unsafe {
37749        let mut fnmadd: f64 = simd_extract!(a, 0);
37750        if (k & 0b00000001) != 0 {
37751            let extracta = -fnmadd;
37752            let extractb: f64 = simd_extract!(b, 0);
37753            let extractc: f64 = simd_extract!(c, 0);
37754            fnmadd = fmaf64(extracta, extractb, extractc);
37755        }
37756        simd_insert!(a, 0, fnmadd)
37757    }
37758}
37759
37760/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37761///
37762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37763#[inline]
37764#[target_feature(enable = "avx512f")]
37765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37766#[cfg_attr(test, assert_instr(vfnmadd))]
37767pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37768    unsafe {
37769        let mut fnmadd: f64 = 0.;
37770        if (k & 0b00000001) != 0 {
37771            let extracta: f64 = simd_extract!(a, 0);
37772            let extracta = -extracta;
37773            let extractb: f64 = simd_extract!(b, 0);
37774            let extractc: f64 = simd_extract!(c, 0);
37775            fnmadd = fmaf64(extracta, extractb, extractc);
37776        }
37777        simd_insert!(a, 0, fnmadd)
37778    }
37779}
37780
37781/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37782///
37783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37784#[inline]
37785#[target_feature(enable = "avx512f")]
37786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37787#[cfg_attr(test, assert_instr(vfnmadd))]
37788pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37789    unsafe {
37790        let mut fnmadd: f64 = simd_extract!(c, 0);
37791        if (k & 0b00000001) != 0 {
37792            let extracta: f64 = simd_extract!(a, 0);
37793            let extracta = -extracta;
37794            let extractb: f64 = simd_extract!(b, 0);
37795            fnmadd = fmaf64(extracta, extractb, fnmadd);
37796        }
37797        simd_insert!(c, 0, fnmadd)
37798    }
37799}
37800
37801/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37802///
37803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37804#[inline]
37805#[target_feature(enable = "avx512f")]
37806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37807#[cfg_attr(test, assert_instr(vfnmsub))]
37808pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37809    unsafe {
37810        let mut fnmsub: f32 = simd_extract!(a, 0);
37811        if (k & 0b00000001) != 0 {
37812            let extracta = -fnmsub;
37813            let extractb: f32 = simd_extract!(b, 0);
37814            let extractc: f32 = simd_extract!(c, 0);
37815            let extractc = -extractc;
37816            fnmsub = fmaf32(extracta, extractb, extractc);
37817        }
37818        simd_insert!(a, 0, fnmsub)
37819    }
37820}
37821
37822/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37823///
37824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37825#[inline]
37826#[target_feature(enable = "avx512f")]
37827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37828#[cfg_attr(test, assert_instr(vfnmsub))]
37829pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37830    unsafe {
37831        let mut fnmsub: f32 = 0.;
37832        if (k & 0b00000001) != 0 {
37833            let extracta: f32 = simd_extract!(a, 0);
37834            let extracta = -extracta;
37835            let extractb: f32 = simd_extract!(b, 0);
37836            let extractc: f32 = simd_extract!(c, 0);
37837            let extractc = -extractc;
37838            fnmsub = fmaf32(extracta, extractb, extractc);
37839        }
37840        simd_insert!(a, 0, fnmsub)
37841    }
37842}
37843
37844/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37845///
37846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37847#[inline]
37848#[target_feature(enable = "avx512f")]
37849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37850#[cfg_attr(test, assert_instr(vfnmsub))]
37851pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37852    unsafe {
37853        let mut fnmsub: f32 = simd_extract!(c, 0);
37854        if (k & 0b00000001) != 0 {
37855            let extracta: f32 = simd_extract!(a, 0);
37856            let extracta = -extracta;
37857            let extractb: f32 = simd_extract!(b, 0);
37858            let extractc = -fnmsub;
37859            fnmsub = fmaf32(extracta, extractb, extractc);
37860        }
37861        simd_insert!(c, 0, fnmsub)
37862    }
37863}
37864
37865/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37866///
37867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37868#[inline]
37869#[target_feature(enable = "avx512f")]
37870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37871#[cfg_attr(test, assert_instr(vfnmsub))]
37872pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37873    unsafe {
37874        let mut fnmsub: f64 = simd_extract!(a, 0);
37875        if (k & 0b00000001) != 0 {
37876            let extracta = -fnmsub;
37877            let extractb: f64 = simd_extract!(b, 0);
37878            let extractc: f64 = simd_extract!(c, 0);
37879            let extractc = -extractc;
37880            fnmsub = fmaf64(extracta, extractb, extractc);
37881        }
37882        simd_insert!(a, 0, fnmsub)
37883    }
37884}
37885
37886/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37887///
37888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37889#[inline]
37890#[target_feature(enable = "avx512f")]
37891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37892#[cfg_attr(test, assert_instr(vfnmsub))]
37893pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37894    unsafe {
37895        let mut fnmsub: f64 = 0.;
37896        if (k & 0b00000001) != 0 {
37897            let extracta: f64 = simd_extract!(a, 0);
37898            let extracta = -extracta;
37899            let extractb: f64 = simd_extract!(b, 0);
37900            let extractc: f64 = simd_extract!(c, 0);
37901            let extractc = -extractc;
37902            fnmsub = fmaf64(extracta, extractb, extractc);
37903        }
37904        simd_insert!(a, 0, fnmsub)
37905    }
37906}
37907
37908/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37909///
37910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37911#[inline]
37912#[target_feature(enable = "avx512f")]
37913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37914#[cfg_attr(test, assert_instr(vfnmsub))]
37915pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37916    unsafe {
37917        let mut fnmsub: f64 = simd_extract!(c, 0);
37918        if (k & 0b00000001) != 0 {
37919            let extracta: f64 = simd_extract!(a, 0);
37920            let extracta = -extracta;
37921            let extractb: f64 = simd_extract!(b, 0);
37922            let extractc = -fnmsub;
37923            fnmsub = fmaf64(extracta, extractb, extractc);
37924        }
37925        simd_insert!(c, 0, fnmsub)
37926    }
37927}
37928
37929/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37930///
37931/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37932/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37933/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37934/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37935/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37936/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37937///
37938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37939#[inline]
37940#[target_feature(enable = "avx512f")]
37941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37942#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37943#[rustc_legacy_const_generics(2)]
37944pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37945    unsafe {
37946        static_assert_rounding!(ROUNDING);
37947        let a = a.as_f32x4();
37948        let b = b.as_f32x4();
37949        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
37950        transmute(r)
37951    }
37952}
37953
37954/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37955///
37956/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37957/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37958/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37959/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37960/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37961/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37962///
37963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
37964#[inline]
37965#[target_feature(enable = "avx512f")]
37966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37967#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37968#[rustc_legacy_const_generics(4)]
37969pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
37970    src: __m128,
37971    k: __mmask8,
37972    a: __m128,
37973    b: __m128,
37974) -> __m128 {
37975    unsafe {
37976        static_assert_rounding!(ROUNDING);
37977        let a = a.as_f32x4();
37978        let b = b.as_f32x4();
37979        let src = src.as_f32x4();
37980        let r = vaddss(a, b, src, k, ROUNDING);
37981        transmute(r)
37982    }
37983}
37984
37985/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37986///
37987/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37988/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37989/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37990/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37991/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37992/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37993///
37994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
37995#[inline]
37996#[target_feature(enable = "avx512f")]
37997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37998#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37999#[rustc_legacy_const_generics(3)]
38000pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38001    unsafe {
38002        static_assert_rounding!(ROUNDING);
38003        let a = a.as_f32x4();
38004        let b = b.as_f32x4();
38005        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
38006        transmute(r)
38007    }
38008}
38009
38010/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38011///
38012/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38013/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38014/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38015/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38016/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38017/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38018///
38019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38020#[inline]
38021#[target_feature(enable = "avx512f")]
38022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38023#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38024#[rustc_legacy_const_generics(2)]
38025pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38026    unsafe {
38027        static_assert_rounding!(ROUNDING);
38028        let a = a.as_f64x2();
38029        let b = b.as_f64x2();
38030        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38031        transmute(r)
38032    }
38033}
38034
38035/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38036///
38037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38043///
38044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38045#[inline]
38046#[target_feature(enable = "avx512f")]
38047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38048#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38049#[rustc_legacy_const_generics(4)]
38050pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38051    src: __m128d,
38052    k: __mmask8,
38053    a: __m128d,
38054    b: __m128d,
38055) -> __m128d {
38056    unsafe {
38057        static_assert_rounding!(ROUNDING);
38058        let a = a.as_f64x2();
38059        let b = b.as_f64x2();
38060        let src = src.as_f64x2();
38061        let r = vaddsd(a, b, src, k, ROUNDING);
38062        transmute(r)
38063    }
38064}
38065
38066/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38067///
38068/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38069/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38070/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38071/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38072/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38073/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38074///
38075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38076#[inline]
38077#[target_feature(enable = "avx512f")]
38078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38079#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38080#[rustc_legacy_const_generics(3)]
38081pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38082    unsafe {
38083        static_assert_rounding!(ROUNDING);
38084        let a = a.as_f64x2();
38085        let b = b.as_f64x2();
38086        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38087        transmute(r)
38088    }
38089}
38090
38091/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38092///
38093/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38094/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38095/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38096/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38097/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38098/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38099///
38100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38101#[inline]
38102#[target_feature(enable = "avx512f")]
38103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38104#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38105#[rustc_legacy_const_generics(2)]
38106pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38107    unsafe {
38108        static_assert_rounding!(ROUNDING);
38109        let a = a.as_f32x4();
38110        let b = b.as_f32x4();
38111        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38112        transmute(r)
38113    }
38114}
38115
38116/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38117///
38118/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38119/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38120/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38121/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38122/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38123/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38124///
38125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38126#[inline]
38127#[target_feature(enable = "avx512f")]
38128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38129#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38130#[rustc_legacy_const_generics(4)]
38131pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38132    src: __m128,
38133    k: __mmask8,
38134    a: __m128,
38135    b: __m128,
38136) -> __m128 {
38137    unsafe {
38138        static_assert_rounding!(ROUNDING);
38139        let a = a.as_f32x4();
38140        let b = b.as_f32x4();
38141        let src = src.as_f32x4();
38142        let r = vsubss(a, b, src, k, ROUNDING);
38143        transmute(r)
38144    }
38145}
38146
38147/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38148///
38149/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38150/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38151/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38152/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38153/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38154/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38155///
38156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38157#[inline]
38158#[target_feature(enable = "avx512f")]
38159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38160#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38161#[rustc_legacy_const_generics(3)]
38162pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38163    unsafe {
38164        static_assert_rounding!(ROUNDING);
38165        let a = a.as_f32x4();
38166        let b = b.as_f32x4();
38167        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38168        transmute(r)
38169    }
38170}
38171
38172/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38173///
38174/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38175/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38176/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38177/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38178/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38179/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38180///
38181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38182#[inline]
38183#[target_feature(enable = "avx512f")]
38184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38185#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38186#[rustc_legacy_const_generics(2)]
38187pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38188    unsafe {
38189        static_assert_rounding!(ROUNDING);
38190        let a = a.as_f64x2();
38191        let b = b.as_f64x2();
38192        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38193        transmute(r)
38194    }
38195}
38196
38197/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38198///
38199/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38200/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38201/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38202/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38203/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38204/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38205///
38206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38207#[inline]
38208#[target_feature(enable = "avx512f")]
38209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38210#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38211#[rustc_legacy_const_generics(4)]
38212pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38213    src: __m128d,
38214    k: __mmask8,
38215    a: __m128d,
38216    b: __m128d,
38217) -> __m128d {
38218    unsafe {
38219        static_assert_rounding!(ROUNDING);
38220        let a = a.as_f64x2();
38221        let b = b.as_f64x2();
38222        let src = src.as_f64x2();
38223        let r = vsubsd(a, b, src, k, ROUNDING);
38224        transmute(r)
38225    }
38226}
38227
38228/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38229///
38230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38236///
38237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38238#[inline]
38239#[target_feature(enable = "avx512f")]
38240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38241#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38242#[rustc_legacy_const_generics(3)]
38243pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38244    unsafe {
38245        static_assert_rounding!(ROUNDING);
38246        let a = a.as_f64x2();
38247        let b = b.as_f64x2();
38248        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38249        transmute(r)
38250    }
38251}
38252
38253/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38254///
38255/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38256/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38257/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38258/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38259/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38260/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38261///
38262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38263#[inline]
38264#[target_feature(enable = "avx512f")]
38265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38266#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38267#[rustc_legacy_const_generics(2)]
38268pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38269    unsafe {
38270        static_assert_rounding!(ROUNDING);
38271        let a = a.as_f32x4();
38272        let b = b.as_f32x4();
38273        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38274        transmute(r)
38275    }
38276}
38277
38278/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38279///
38280/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38281/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38282/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38283/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38284/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38285/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38286///
38287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38288#[inline]
38289#[target_feature(enable = "avx512f")]
38290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38291#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38292#[rustc_legacy_const_generics(4)]
38293pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38294    src: __m128,
38295    k: __mmask8,
38296    a: __m128,
38297    b: __m128,
38298) -> __m128 {
38299    unsafe {
38300        static_assert_rounding!(ROUNDING);
38301        let a = a.as_f32x4();
38302        let b = b.as_f32x4();
38303        let src = src.as_f32x4();
38304        let r = vmulss(a, b, src, k, ROUNDING);
38305        transmute(r)
38306    }
38307}
38308
38309/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38310///
38311/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38312/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38313/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38314/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38315/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38316/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38317///
38318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38319#[inline]
38320#[target_feature(enable = "avx512f")]
38321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38322#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38323#[rustc_legacy_const_generics(3)]
38324pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38325    unsafe {
38326        static_assert_rounding!(ROUNDING);
38327        let a = a.as_f32x4();
38328        let b = b.as_f32x4();
38329        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38330        transmute(r)
38331    }
38332}
38333
38334/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38335///
38336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38342///
38343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38344#[inline]
38345#[target_feature(enable = "avx512f")]
38346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38347#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38348#[rustc_legacy_const_generics(2)]
38349pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38350    unsafe {
38351        static_assert_rounding!(ROUNDING);
38352        let a = a.as_f64x2();
38353        let b = b.as_f64x2();
38354        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38355        transmute(r)
38356    }
38357}
38358
38359/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38360///
38361/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38362/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38363/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38364/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38365/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38366/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38367///
38368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38369#[inline]
38370#[target_feature(enable = "avx512f")]
38371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38372#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38373#[rustc_legacy_const_generics(4)]
38374pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38375    src: __m128d,
38376    k: __mmask8,
38377    a: __m128d,
38378    b: __m128d,
38379) -> __m128d {
38380    unsafe {
38381        static_assert_rounding!(ROUNDING);
38382        let a = a.as_f64x2();
38383        let b = b.as_f64x2();
38384        let src = src.as_f64x2();
38385        let r = vmulsd(a, b, src, k, ROUNDING);
38386        transmute(r)
38387    }
38388}
38389
38390/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38391///
38392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38398///
38399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38400#[inline]
38401#[target_feature(enable = "avx512f")]
38402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38403#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38404#[rustc_legacy_const_generics(3)]
38405pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38406    unsafe {
38407        static_assert_rounding!(ROUNDING);
38408        let a = a.as_f64x2();
38409        let b = b.as_f64x2();
38410        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38411        transmute(r)
38412    }
38413}
38414
38415/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38416///
38417/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38418/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38419/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38420/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38421/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38422/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38423///
38424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38425#[inline]
38426#[target_feature(enable = "avx512f")]
38427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38428#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38429#[rustc_legacy_const_generics(2)]
38430pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38431    unsafe {
38432        static_assert_rounding!(ROUNDING);
38433        let a = a.as_f32x4();
38434        let b = b.as_f32x4();
38435        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38436        transmute(r)
38437    }
38438}
38439
38440/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38441///
38442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38448///
38449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38450#[inline]
38451#[target_feature(enable = "avx512f")]
38452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38453#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38454#[rustc_legacy_const_generics(4)]
38455pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38456    src: __m128,
38457    k: __mmask8,
38458    a: __m128,
38459    b: __m128,
38460) -> __m128 {
38461    unsafe {
38462        static_assert_rounding!(ROUNDING);
38463        let a = a.as_f32x4();
38464        let b = b.as_f32x4();
38465        let src = src.as_f32x4();
38466        let r = vdivss(a, b, src, k, ROUNDING);
38467        transmute(r)
38468    }
38469}
38470
38471/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38472///
38473/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38474/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38475/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38476/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38477/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38478/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38479///
38480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38481#[inline]
38482#[target_feature(enable = "avx512f")]
38483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38484#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38485#[rustc_legacy_const_generics(3)]
38486pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38487    unsafe {
38488        static_assert_rounding!(ROUNDING);
38489        let a = a.as_f32x4();
38490        let b = b.as_f32x4();
38491        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38492        transmute(r)
38493    }
38494}
38495
38496/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38497///
38498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38504///
38505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38506#[inline]
38507#[target_feature(enable = "avx512f")]
38508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38509#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38510#[rustc_legacy_const_generics(2)]
38511pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38512    unsafe {
38513        static_assert_rounding!(ROUNDING);
38514        let a = a.as_f64x2();
38515        let b = b.as_f64x2();
38516        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38517        transmute(r)
38518    }
38519}
38520
38521/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38522///
38523/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38524/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38525/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38526/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38527/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38528/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38529///
38530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38531#[inline]
38532#[target_feature(enable = "avx512f")]
38533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38534#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38535#[rustc_legacy_const_generics(4)]
38536pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38537    src: __m128d,
38538    k: __mmask8,
38539    a: __m128d,
38540    b: __m128d,
38541) -> __m128d {
38542    unsafe {
38543        static_assert_rounding!(ROUNDING);
38544        let a = a.as_f64x2();
38545        let b = b.as_f64x2();
38546        let src = src.as_f64x2();
38547        let r = vdivsd(a, b, src, k, ROUNDING);
38548        transmute(r)
38549    }
38550}
38551
38552/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38553///
38554/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38555/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38556/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38557/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38558/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38560///
38561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38562#[inline]
38563#[target_feature(enable = "avx512f")]
38564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38565#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38566#[rustc_legacy_const_generics(3)]
38567pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38568    unsafe {
38569        static_assert_rounding!(ROUNDING);
38570        let a = a.as_f64x2();
38571        let b = b.as_f64x2();
38572        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38573        transmute(r)
38574    }
38575}
38576
38577/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38578/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38579///
38580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38581#[inline]
38582#[target_feature(enable = "avx512f")]
38583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38584#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38585#[rustc_legacy_const_generics(2)]
38586pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38587    unsafe {
38588        static_assert_sae!(SAE);
38589        let a = a.as_f32x4();
38590        let b = b.as_f32x4();
38591        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38592        transmute(r)
38593    }
38594}
38595
38596/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38597/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38598///
38599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38600#[inline]
38601#[target_feature(enable = "avx512f")]
38602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38603#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38604#[rustc_legacy_const_generics(4)]
38605pub fn _mm_mask_max_round_ss<const SAE: i32>(
38606    src: __m128,
38607    k: __mmask8,
38608    a: __m128,
38609    b: __m128,
38610) -> __m128 {
38611    unsafe {
38612        static_assert_sae!(SAE);
38613        let a = a.as_f32x4();
38614        let b = b.as_f32x4();
38615        let src = src.as_f32x4();
38616        let r = vmaxss(a, b, src, k, SAE);
38617        transmute(r)
38618    }
38619}
38620
38621/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38622/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38623///
38624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38625#[inline]
38626#[target_feature(enable = "avx512f")]
38627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38628#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38629#[rustc_legacy_const_generics(3)]
38630pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38631    unsafe {
38632        static_assert_sae!(SAE);
38633        let a = a.as_f32x4();
38634        let b = b.as_f32x4();
38635        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38636        transmute(r)
38637    }
38638}
38639
38640/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38641/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38642///
38643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38644#[inline]
38645#[target_feature(enable = "avx512f")]
38646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38647#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38648#[rustc_legacy_const_generics(2)]
38649pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38650    unsafe {
38651        static_assert_sae!(SAE);
38652        let a = a.as_f64x2();
38653        let b = b.as_f64x2();
38654        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38655        transmute(r)
38656    }
38657}
38658
38659/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38660/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38661///
38662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38663#[inline]
38664#[target_feature(enable = "avx512f")]
38665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38666#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38667#[rustc_legacy_const_generics(4)]
38668pub fn _mm_mask_max_round_sd<const SAE: i32>(
38669    src: __m128d,
38670    k: __mmask8,
38671    a: __m128d,
38672    b: __m128d,
38673) -> __m128d {
38674    unsafe {
38675        static_assert_sae!(SAE);
38676        let a = a.as_f64x2();
38677        let b = b.as_f64x2();
38678        let src = src.as_f64x2();
38679        let r = vmaxsd(a, b, src, k, SAE);
38680        transmute(r)
38681    }
38682}
38683
38684/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38685/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38686///
38687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38688#[inline]
38689#[target_feature(enable = "avx512f")]
38690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38691#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38692#[rustc_legacy_const_generics(3)]
38693pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38694    unsafe {
38695        static_assert_sae!(SAE);
38696        let a = a.as_f64x2();
38697        let b = b.as_f64x2();
38698        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38699        transmute(r)
38700    }
38701}
38702
38703/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38704/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38705///
38706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38707#[inline]
38708#[target_feature(enable = "avx512f")]
38709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38710#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38711#[rustc_legacy_const_generics(2)]
38712pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38713    unsafe {
38714        static_assert_sae!(SAE);
38715        let a = a.as_f32x4();
38716        let b = b.as_f32x4();
38717        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38718        transmute(r)
38719    }
38720}
38721
38722/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38723/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38724///
38725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38726#[inline]
38727#[target_feature(enable = "avx512f")]
38728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38729#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38730#[rustc_legacy_const_generics(4)]
38731pub fn _mm_mask_min_round_ss<const SAE: i32>(
38732    src: __m128,
38733    k: __mmask8,
38734    a: __m128,
38735    b: __m128,
38736) -> __m128 {
38737    unsafe {
38738        static_assert_sae!(SAE);
38739        let a = a.as_f32x4();
38740        let b = b.as_f32x4();
38741        let src = src.as_f32x4();
38742        let r = vminss(a, b, src, k, SAE);
38743        transmute(r)
38744    }
38745}
38746
38747/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38748/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38749///
38750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38751#[inline]
38752#[target_feature(enable = "avx512f")]
38753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38754#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38755#[rustc_legacy_const_generics(3)]
38756pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38757    unsafe {
38758        static_assert_sae!(SAE);
38759        let a = a.as_f32x4();
38760        let b = b.as_f32x4();
38761        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38762        transmute(r)
38763    }
38764}
38765
38766/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38767/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38768///
38769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38770#[inline]
38771#[target_feature(enable = "avx512f")]
38772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38773#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38774#[rustc_legacy_const_generics(2)]
38775pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38776    unsafe {
38777        static_assert_sae!(SAE);
38778        let a = a.as_f64x2();
38779        let b = b.as_f64x2();
38780        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38781        transmute(r)
38782    }
38783}
38784
38785/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38786/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38787///
38788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38789#[inline]
38790#[target_feature(enable = "avx512f")]
38791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38792#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38793#[rustc_legacy_const_generics(4)]
38794pub fn _mm_mask_min_round_sd<const SAE: i32>(
38795    src: __m128d,
38796    k: __mmask8,
38797    a: __m128d,
38798    b: __m128d,
38799) -> __m128d {
38800    unsafe {
38801        static_assert_sae!(SAE);
38802        let a = a.as_f64x2();
38803        let b = b.as_f64x2();
38804        let src = src.as_f64x2();
38805        let r = vminsd(a, b, src, k, SAE);
38806        transmute(r)
38807    }
38808}
38809
38810/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38811/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38812///
38813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38814#[inline]
38815#[target_feature(enable = "avx512f")]
38816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38817#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38818#[rustc_legacy_const_generics(3)]
38819pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38820    unsafe {
38821        static_assert_sae!(SAE);
38822        let a = a.as_f64x2();
38823        let b = b.as_f64x2();
38824        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38825        transmute(r)
38826    }
38827}
38828
38829/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38830///
38831/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38832/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38833/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38834/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38835/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38836/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38837///
38838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38839#[inline]
38840#[target_feature(enable = "avx512f")]
38841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38842#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38843#[rustc_legacy_const_generics(2)]
38844pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38845    unsafe {
38846        static_assert_rounding!(ROUNDING);
38847        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38848    }
38849}
38850
38851/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38852///
38853/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38854/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38855/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38856/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38857/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38858/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38859///
38860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38861#[inline]
38862#[target_feature(enable = "avx512f")]
38863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38864#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38865#[rustc_legacy_const_generics(4)]
38866pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38867    src: __m128,
38868    k: __mmask8,
38869    a: __m128,
38870    b: __m128,
38871) -> __m128 {
38872    unsafe {
38873        static_assert_rounding!(ROUNDING);
38874        vsqrtss(a, b, src, k, ROUNDING)
38875    }
38876}
38877
38878/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38879///
38880/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38881/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38882/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38883/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38884/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38885/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38886///
38887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38888#[inline]
38889#[target_feature(enable = "avx512f")]
38890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38891#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38892#[rustc_legacy_const_generics(3)]
38893pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38894    unsafe {
38895        static_assert_rounding!(ROUNDING);
38896        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38897    }
38898}
38899
38900/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38901///
38902/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38903/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38904/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38905/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38906/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38907/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38908///
38909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38910#[inline]
38911#[target_feature(enable = "avx512f")]
38912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38913#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38914#[rustc_legacy_const_generics(2)]
38915pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38916    unsafe {
38917        static_assert_rounding!(ROUNDING);
38918        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38919    }
38920}
38921
38922/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38923///
38924/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38925/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38926/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38927/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38928/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38929/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38930///
38931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38932#[inline]
38933#[target_feature(enable = "avx512f")]
38934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38935#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38936#[rustc_legacy_const_generics(4)]
38937pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38938    src: __m128d,
38939    k: __mmask8,
38940    a: __m128d,
38941    b: __m128d,
38942) -> __m128d {
38943    unsafe {
38944        static_assert_rounding!(ROUNDING);
38945        vsqrtsd(a, b, src, k, ROUNDING)
38946    }
38947}
38948
38949/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38950///
38951/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38952/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38953/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38954/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38955/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38956/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38957///
38958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
38959#[inline]
38960#[target_feature(enable = "avx512f")]
38961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38962#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38963#[rustc_legacy_const_generics(3)]
38964pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
38965    k: __mmask8,
38966    a: __m128d,
38967    b: __m128d,
38968) -> __m128d {
38969    unsafe {
38970        static_assert_rounding!(ROUNDING);
38971        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
38972    }
38973}
38974
38975/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38976/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38977///
38978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
38979#[inline]
38980#[target_feature(enable = "avx512f")]
38981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38982#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38983#[rustc_legacy_const_generics(2)]
38984pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38985    unsafe {
38986        static_assert_sae!(SAE);
38987        let a = a.as_f32x4();
38988        let b = b.as_f32x4();
38989        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
38990        transmute(r)
38991    }
38992}
38993
38994/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38995/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38996///
38997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
38998#[inline]
38999#[target_feature(enable = "avx512f")]
39000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39001#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39002#[rustc_legacy_const_generics(4)]
39003pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
39004    src: __m128,
39005    k: __mmask8,
39006    a: __m128,
39007    b: __m128,
39008) -> __m128 {
39009    unsafe {
39010        static_assert_sae!(SAE);
39011        let a = a.as_f32x4();
39012        let b = b.as_f32x4();
39013        let src = src.as_f32x4();
39014        let r = vgetexpss(a, b, src, k, SAE);
39015        transmute(r)
39016    }
39017}
39018
39019/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39020/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39021///
39022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39023#[inline]
39024#[target_feature(enable = "avx512f")]
39025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39026#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39027#[rustc_legacy_const_generics(3)]
39028pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39029    unsafe {
39030        static_assert_sae!(SAE);
39031        let a = a.as_f32x4();
39032        let b = b.as_f32x4();
39033        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39034        transmute(r)
39035    }
39036}
39037
39038/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39039/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39040///
39041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39042#[inline]
39043#[target_feature(enable = "avx512f")]
39044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39045#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39046#[rustc_legacy_const_generics(2)]
39047pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39048    unsafe {
39049        static_assert_sae!(SAE);
39050        let a = a.as_f64x2();
39051        let b = b.as_f64x2();
39052        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39053        transmute(r)
39054    }
39055}
39056
39057/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39059///
39060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39061#[inline]
39062#[target_feature(enable = "avx512f")]
39063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39064#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39065#[rustc_legacy_const_generics(4)]
39066pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39067    src: __m128d,
39068    k: __mmask8,
39069    a: __m128d,
39070    b: __m128d,
39071) -> __m128d {
39072    unsafe {
39073        static_assert_sae!(SAE);
39074        let a = a.as_f64x2();
39075        let b = b.as_f64x2();
39076        let src = src.as_f64x2();
39077        let r = vgetexpsd(a, b, src, k, SAE);
39078        transmute(r)
39079    }
39080}
39081
39082/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39083/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39084///
39085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39086#[inline]
39087#[target_feature(enable = "avx512f")]
39088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39089#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39090#[rustc_legacy_const_generics(3)]
39091pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39092    unsafe {
39093        static_assert_sae!(SAE);
39094        let a = a.as_f64x2();
39095        let b = b.as_f64x2();
39096        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39097        transmute(r)
39098    }
39099}
39100
39101/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39102/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39103///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39104///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39105///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39106///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39107/// The sign is determined by sc which can take the following values:\
39108///    _MM_MANT_SIGN_src     // sign = sign(src)\
39109///    _MM_MANT_SIGN_zero    // sign = 0\
39110///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39111/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39112///
39113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39114#[inline]
39115#[target_feature(enable = "avx512f")]
39116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39117#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39118#[rustc_legacy_const_generics(2, 3, 4)]
39119pub fn _mm_getmant_round_ss<
39120    const NORM: _MM_MANTISSA_NORM_ENUM,
39121    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39122    const SAE: i32,
39123>(
39124    a: __m128,
39125    b: __m128,
39126) -> __m128 {
39127    unsafe {
39128        static_assert_uimm_bits!(NORM, 4);
39129        static_assert_uimm_bits!(SIGN, 2);
39130        static_assert_mantissas_sae!(SAE);
39131        let a = a.as_f32x4();
39132        let b = b.as_f32x4();
39133        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39134        transmute(r)
39135    }
39136}
39137
39138/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39139/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39140///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39141///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39142///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39143///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39144/// The sign is determined by sc which can take the following values:\
39145///    _MM_MANT_SIGN_src     // sign = sign(src)\
39146///    _MM_MANT_SIGN_zero    // sign = 0\
39147///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39148/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39149///
39150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39151#[inline]
39152#[target_feature(enable = "avx512f")]
39153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39154#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39155#[rustc_legacy_const_generics(4, 5, 6)]
39156pub fn _mm_mask_getmant_round_ss<
39157    const NORM: _MM_MANTISSA_NORM_ENUM,
39158    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39159    const SAE: i32,
39160>(
39161    src: __m128,
39162    k: __mmask8,
39163    a: __m128,
39164    b: __m128,
39165) -> __m128 {
39166    unsafe {
39167        static_assert_uimm_bits!(NORM, 4);
39168        static_assert_uimm_bits!(SIGN, 2);
39169        static_assert_mantissas_sae!(SAE);
39170        let a = a.as_f32x4();
39171        let b = b.as_f32x4();
39172        let src = src.as_f32x4();
39173        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39174        transmute(r)
39175    }
39176}
39177
39178/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39179/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39180///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39181///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39182///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39183///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39184/// The sign is determined by sc which can take the following values:\
39185///    _MM_MANT_SIGN_src     // sign = sign(src)\
39186///    _MM_MANT_SIGN_zero    // sign = 0\
39187///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39188/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39189///
39190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39191#[inline]
39192#[target_feature(enable = "avx512f")]
39193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39194#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39195#[rustc_legacy_const_generics(3, 4, 5)]
39196pub fn _mm_maskz_getmant_round_ss<
39197    const NORM: _MM_MANTISSA_NORM_ENUM,
39198    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39199    const SAE: i32,
39200>(
39201    k: __mmask8,
39202    a: __m128,
39203    b: __m128,
39204) -> __m128 {
39205    unsafe {
39206        static_assert_uimm_bits!(NORM, 4);
39207        static_assert_uimm_bits!(SIGN, 2);
39208        static_assert_mantissas_sae!(SAE);
39209        let a = a.as_f32x4();
39210        let b = b.as_f32x4();
39211        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39212        transmute(r)
39213    }
39214}
39215
39216/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39217/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39218///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39219///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39220///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39221///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39222/// The sign is determined by sc which can take the following values:\
39223///    _MM_MANT_SIGN_src     // sign = sign(src)\
39224///    _MM_MANT_SIGN_zero    // sign = 0\
39225///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39226/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39227///
39228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39229#[inline]
39230#[target_feature(enable = "avx512f")]
39231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39232#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39233#[rustc_legacy_const_generics(2, 3, 4)]
39234pub fn _mm_getmant_round_sd<
39235    const NORM: _MM_MANTISSA_NORM_ENUM,
39236    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39237    const SAE: i32,
39238>(
39239    a: __m128d,
39240    b: __m128d,
39241) -> __m128d {
39242    unsafe {
39243        static_assert_uimm_bits!(NORM, 4);
39244        static_assert_uimm_bits!(SIGN, 2);
39245        static_assert_mantissas_sae!(SAE);
39246        let a = a.as_f64x2();
39247        let b = b.as_f64x2();
39248        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39249        transmute(r)
39250    }
39251}
39252
39253/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39254/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39255///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39256///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39257///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39258///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39259/// The sign is determined by sc which can take the following values:\
39260///    _MM_MANT_SIGN_src     // sign = sign(src)\
39261///    _MM_MANT_SIGN_zero    // sign = 0\
39262///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39263/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39264///
39265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39266#[inline]
39267#[target_feature(enable = "avx512f")]
39268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39269#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39270#[rustc_legacy_const_generics(4, 5, 6)]
39271pub fn _mm_mask_getmant_round_sd<
39272    const NORM: _MM_MANTISSA_NORM_ENUM,
39273    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39274    const SAE: i32,
39275>(
39276    src: __m128d,
39277    k: __mmask8,
39278    a: __m128d,
39279    b: __m128d,
39280) -> __m128d {
39281    unsafe {
39282        static_assert_uimm_bits!(NORM, 4);
39283        static_assert_uimm_bits!(SIGN, 2);
39284        static_assert_mantissas_sae!(SAE);
39285        let a = a.as_f64x2();
39286        let b = b.as_f64x2();
39287        let src = src.as_f64x2();
39288        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39289        transmute(r)
39290    }
39291}
39292
39293/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39294/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39295///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39296///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39297///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39298///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39299/// The sign is determined by sc which can take the following values:\
39300///    _MM_MANT_SIGN_src     // sign = sign(src)\
39301///    _MM_MANT_SIGN_zero    // sign = 0\
39302///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39304///
39305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39306#[inline]
39307#[target_feature(enable = "avx512f")]
39308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39309#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39310#[rustc_legacy_const_generics(3, 4, 5)]
39311pub fn _mm_maskz_getmant_round_sd<
39312    const NORM: _MM_MANTISSA_NORM_ENUM,
39313    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39314    const SAE: i32,
39315>(
39316    k: __mmask8,
39317    a: __m128d,
39318    b: __m128d,
39319) -> __m128d {
39320    unsafe {
39321        static_assert_uimm_bits!(NORM, 4);
39322        static_assert_uimm_bits!(SIGN, 2);
39323        static_assert_mantissas_sae!(SAE);
39324        let a = a.as_f64x2();
39325        let b = b.as_f64x2();
39326        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39327        transmute(r)
39328    }
39329}
39330
39331/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39332/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39333/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39334/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39335/// * [`_MM_FROUND_TO_POS_INF`] : round up
39336/// * [`_MM_FROUND_TO_ZERO`] : truncate
39337/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39338///
39339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39341#[inline]
39342#[target_feature(enable = "avx512f")]
39343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39344#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39345#[rustc_legacy_const_generics(2, 3)]
39346pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39347    unsafe {
39348        static_assert_uimm_bits!(IMM8, 8);
39349        static_assert_mantissas_sae!(SAE);
39350        let a = a.as_f32x4();
39351        let b = b.as_f32x4();
39352        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39353        transmute(r)
39354    }
39355}
39356
39357/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39358/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39359/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39360/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39361/// * [`_MM_FROUND_TO_POS_INF`] : round up
39362/// * [`_MM_FROUND_TO_ZERO`] : truncate
39363/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39364///
39365/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39367#[inline]
39368#[target_feature(enable = "avx512f")]
39369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39370#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39371#[rustc_legacy_const_generics(4, 5)]
39372pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39373    src: __m128,
39374    k: __mmask8,
39375    a: __m128,
39376    b: __m128,
39377) -> __m128 {
39378    unsafe {
39379        static_assert_uimm_bits!(IMM8, 8);
39380        static_assert_mantissas_sae!(SAE);
39381        let a = a.as_f32x4();
39382        let b = b.as_f32x4();
39383        let src = src.as_f32x4();
39384        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39385        transmute(r)
39386    }
39387}
39388
39389/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39390/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39391/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39392/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39393/// * [`_MM_FROUND_TO_POS_INF`] : round up
39394/// * [`_MM_FROUND_TO_ZERO`] : truncate
39395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39396///
39397/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39399#[inline]
39400#[target_feature(enable = "avx512f")]
39401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39402#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39403#[rustc_legacy_const_generics(3, 4)]
39404pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39405    k: __mmask8,
39406    a: __m128,
39407    b: __m128,
39408) -> __m128 {
39409    unsafe {
39410        static_assert_uimm_bits!(IMM8, 8);
39411        static_assert_mantissas_sae!(SAE);
39412        let a = a.as_f32x4();
39413        let b = b.as_f32x4();
39414        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39415        transmute(r)
39416    }
39417}
39418
39419/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39420/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39421/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39422/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39423/// * [`_MM_FROUND_TO_POS_INF`] : round up
39424/// * [`_MM_FROUND_TO_ZERO`] : truncate
39425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39426///
39427/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39429#[inline]
39430#[target_feature(enable = "avx512f")]
39431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39432#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39433#[rustc_legacy_const_generics(2, 3)]
39434pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39435    unsafe {
39436        static_assert_uimm_bits!(IMM8, 8);
39437        static_assert_mantissas_sae!(SAE);
39438        let a = a.as_f64x2();
39439        let b = b.as_f64x2();
39440        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39441        transmute(r)
39442    }
39443}
39444
39445/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39446/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39447/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39448/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39449/// * [`_MM_FROUND_TO_POS_INF`] : round up
39450/// * [`_MM_FROUND_TO_ZERO`] : truncate
39451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39452///
39453/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39455#[inline]
39456#[target_feature(enable = "avx512f")]
39457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39458#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39459#[rustc_legacy_const_generics(4, 5)]
39460pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39461    src: __m128d,
39462    k: __mmask8,
39463    a: __m128d,
39464    b: __m128d,
39465) -> __m128d {
39466    unsafe {
39467        static_assert_uimm_bits!(IMM8, 8);
39468        static_assert_mantissas_sae!(SAE);
39469        let a = a.as_f64x2();
39470        let b = b.as_f64x2();
39471        let src = src.as_f64x2();
39472        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39473        transmute(r)
39474    }
39475}
39476
39477/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39478/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39479/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39480/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39481/// * [`_MM_FROUND_TO_POS_INF`] : round up
39482/// * [`_MM_FROUND_TO_ZERO`] : truncate
39483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39484///
39485/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39487#[inline]
39488#[target_feature(enable = "avx512f")]
39489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39490#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39491#[rustc_legacy_const_generics(3, 4)]
39492pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39493    k: __mmask8,
39494    a: __m128d,
39495    b: __m128d,
39496) -> __m128d {
39497    unsafe {
39498        static_assert_uimm_bits!(IMM8, 8);
39499        static_assert_mantissas_sae!(SAE);
39500        let a = a.as_f64x2();
39501        let b = b.as_f64x2();
39502        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39503        transmute(r)
39504    }
39505}
39506
39507/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39508///
39509/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39510/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39511/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39512/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39513/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39514/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39515///
39516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39517#[inline]
39518#[target_feature(enable = "avx512f")]
39519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39520#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39521#[rustc_legacy_const_generics(2)]
39522pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39523    unsafe {
39524        static_assert_rounding!(ROUNDING);
39525        let a = a.as_f32x4();
39526        let b = b.as_f32x4();
39527        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39528        transmute(r)
39529    }
39530}
39531
39532/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39533///
39534/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39535/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39536/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39537/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39538/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39539/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39540///
39541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39542#[inline]
39543#[target_feature(enable = "avx512f")]
39544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39545#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39546#[rustc_legacy_const_generics(4)]
39547pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39548    src: __m128,
39549    k: __mmask8,
39550    a: __m128,
39551    b: __m128,
39552) -> __m128 {
39553    unsafe {
39554        static_assert_rounding!(ROUNDING);
39555        let a = a.as_f32x4();
39556        let b = b.as_f32x4();
39557        let src = src.as_f32x4();
39558        let r = vscalefss(a, b, src, k, ROUNDING);
39559        transmute(r)
39560    }
39561}
39562
39563/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39564///
39565/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39566/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39567/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39568/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39569/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39570/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39571///
39572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39573#[inline]
39574#[target_feature(enable = "avx512f")]
39575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39576#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39577#[rustc_legacy_const_generics(3)]
39578pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39579    unsafe {
39580        static_assert_rounding!(ROUNDING);
39581        let a = a.as_f32x4();
39582        let b = b.as_f32x4();
39583        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39584        transmute(r)
39585    }
39586}
39587
39588/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39589///
39590/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39591/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39592/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39593/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39594/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39595/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39596///
39597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39598#[inline]
39599#[target_feature(enable = "avx512f")]
39600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39601#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39602#[rustc_legacy_const_generics(2)]
39603pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39604    unsafe {
39605        static_assert_rounding!(ROUNDING);
39606        let a = a.as_f64x2();
39607        let b = b.as_f64x2();
39608        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39609        transmute(r)
39610    }
39611}
39612
39613/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39614///
39615/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39616/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39617/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39618/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39619/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39620/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39621///
39622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39623#[inline]
39624#[target_feature(enable = "avx512f")]
39625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39626#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39627#[rustc_legacy_const_generics(4)]
39628pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39629    src: __m128d,
39630    k: __mmask8,
39631    a: __m128d,
39632    b: __m128d,
39633) -> __m128d {
39634    unsafe {
39635        let a = a.as_f64x2();
39636        let b = b.as_f64x2();
39637        let src = src.as_f64x2();
39638        let r = vscalefsd(a, b, src, k, ROUNDING);
39639        transmute(r)
39640    }
39641}
39642
39643/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39644///
39645/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39651///
39652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39653#[inline]
39654#[target_feature(enable = "avx512f")]
39655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39656#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39657#[rustc_legacy_const_generics(3)]
39658pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39659    k: __mmask8,
39660    a: __m128d,
39661    b: __m128d,
39662) -> __m128d {
39663    unsafe {
39664        static_assert_rounding!(ROUNDING);
39665        let a = a.as_f64x2();
39666        let b = b.as_f64x2();
39667        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39668        transmute(r)
39669    }
39670}
39671
39672/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39673///
39674/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39675/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39676/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39677/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39678/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39679/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39680///
39681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39682#[inline]
39683#[target_feature(enable = "avx512f")]
39684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39685#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39686#[rustc_legacy_const_generics(3)]
39687pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39688    unsafe {
39689        static_assert_rounding!(ROUNDING);
39690        let extracta: f32 = simd_extract!(a, 0);
39691        let extractb: f32 = simd_extract!(b, 0);
39692        let extractc: f32 = simd_extract!(c, 0);
39693        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39694        simd_insert!(a, 0, r)
39695    }
39696}
39697
39698/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39699///
39700/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39701/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39702/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39703/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39704/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39705/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39706///
39707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39708#[inline]
39709#[target_feature(enable = "avx512f")]
39710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39711#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39712#[rustc_legacy_const_generics(4)]
39713pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39714    a: __m128,
39715    k: __mmask8,
39716    b: __m128,
39717    c: __m128,
39718) -> __m128 {
39719    unsafe {
39720        static_assert_rounding!(ROUNDING);
39721        let mut fmadd: f32 = simd_extract!(a, 0);
39722        if (k & 0b00000001) != 0 {
39723            let extractb: f32 = simd_extract!(b, 0);
39724            let extractc: f32 = simd_extract!(c, 0);
39725            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39726        }
39727        simd_insert!(a, 0, fmadd)
39728    }
39729}
39730
39731/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39732///
39733/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39734/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39735/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39736/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39737/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39738/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39739///
39740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39741#[inline]
39742#[target_feature(enable = "avx512f")]
39743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39744#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39745#[rustc_legacy_const_generics(4)]
39746pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39747    k: __mmask8,
39748    a: __m128,
39749    b: __m128,
39750    c: __m128,
39751) -> __m128 {
39752    unsafe {
39753        static_assert_rounding!(ROUNDING);
39754        let mut fmadd: f32 = 0.;
39755        if (k & 0b00000001) != 0 {
39756            let extracta: f32 = simd_extract!(a, 0);
39757            let extractb: f32 = simd_extract!(b, 0);
39758            let extractc: f32 = simd_extract!(c, 0);
39759            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39760        }
39761        simd_insert!(a, 0, fmadd)
39762    }
39763}
39764
39765/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39766///
39767/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39768/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39769/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39770/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39771/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39772/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39773///
39774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39775#[inline]
39776#[target_feature(enable = "avx512f")]
39777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39778#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39779#[rustc_legacy_const_generics(4)]
39780pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39781    a: __m128,
39782    b: __m128,
39783    c: __m128,
39784    k: __mmask8,
39785) -> __m128 {
39786    unsafe {
39787        static_assert_rounding!(ROUNDING);
39788        let mut fmadd: f32 = simd_extract!(c, 0);
39789        if (k & 0b00000001) != 0 {
39790            let extracta: f32 = simd_extract!(a, 0);
39791            let extractb: f32 = simd_extract!(b, 0);
39792            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39793        }
39794        simd_insert!(c, 0, fmadd)
39795    }
39796}
39797
39798/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39799///
39800/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39806///
39807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39808#[inline]
39809#[target_feature(enable = "avx512f")]
39810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39811#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39812#[rustc_legacy_const_generics(3)]
39813pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39814    unsafe {
39815        static_assert_rounding!(ROUNDING);
39816        let extracta: f64 = simd_extract!(a, 0);
39817        let extractb: f64 = simd_extract!(b, 0);
39818        let extractc: f64 = simd_extract!(c, 0);
39819        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39820        simd_insert!(a, 0, fmadd)
39821    }
39822}
39823
39824/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39825///
39826/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39827/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39828/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39829/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39830/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39831/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39832///
39833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39834#[inline]
39835#[target_feature(enable = "avx512f")]
39836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39837#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39838#[rustc_legacy_const_generics(4)]
39839pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39840    a: __m128d,
39841    k: __mmask8,
39842    b: __m128d,
39843    c: __m128d,
39844) -> __m128d {
39845    unsafe {
39846        static_assert_rounding!(ROUNDING);
39847        let mut fmadd: f64 = simd_extract!(a, 0);
39848        if (k & 0b00000001) != 0 {
39849            let extractb: f64 = simd_extract!(b, 0);
39850            let extractc: f64 = simd_extract!(c, 0);
39851            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39852        }
39853        simd_insert!(a, 0, fmadd)
39854    }
39855}
39856
39857/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39858///
39859/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39860/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39861/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39862/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39863/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39865///
39866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39867#[inline]
39868#[target_feature(enable = "avx512f")]
39869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39870#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39871#[rustc_legacy_const_generics(4)]
39872pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39873    k: __mmask8,
39874    a: __m128d,
39875    b: __m128d,
39876    c: __m128d,
39877) -> __m128d {
39878    unsafe {
39879        static_assert_rounding!(ROUNDING);
39880        let mut fmadd: f64 = 0.;
39881        if (k & 0b00000001) != 0 {
39882            let extracta: f64 = simd_extract!(a, 0);
39883            let extractb: f64 = simd_extract!(b, 0);
39884            let extractc: f64 = simd_extract!(c, 0);
39885            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39886        }
39887        simd_insert!(a, 0, fmadd)
39888    }
39889}
39890
39891/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39892///
39893/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39894/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39895/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39896/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39897/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39898/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39899///
39900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39901#[inline]
39902#[target_feature(enable = "avx512f")]
39903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39904#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39905#[rustc_legacy_const_generics(4)]
39906pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39907    a: __m128d,
39908    b: __m128d,
39909    c: __m128d,
39910    k: __mmask8,
39911) -> __m128d {
39912    unsafe {
39913        static_assert_rounding!(ROUNDING);
39914        let mut fmadd: f64 = simd_extract!(c, 0);
39915        if (k & 0b00000001) != 0 {
39916            let extracta: f64 = simd_extract!(a, 0);
39917            let extractb: f64 = simd_extract!(b, 0);
39918            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39919        }
39920        simd_insert!(c, 0, fmadd)
39921    }
39922}
39923
39924/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39925///
39926/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39927/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39928/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39929/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39930/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39931/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39932///
39933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39934#[inline]
39935#[target_feature(enable = "avx512f")]
39936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39937#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39938#[rustc_legacy_const_generics(3)]
39939pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39940    unsafe {
39941        static_assert_rounding!(ROUNDING);
39942        let extracta: f32 = simd_extract!(a, 0);
39943        let extractb: f32 = simd_extract!(b, 0);
39944        let extractc: f32 = simd_extract!(c, 0);
39945        let extractc = -extractc;
39946        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39947        simd_insert!(a, 0, fmsub)
39948    }
39949}
39950
39951/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39952///
39953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39959///
39960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
39961#[inline]
39962#[target_feature(enable = "avx512f")]
39963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39964#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39965#[rustc_legacy_const_generics(4)]
39966pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
39967    a: __m128,
39968    k: __mmask8,
39969    b: __m128,
39970    c: __m128,
39971) -> __m128 {
39972    unsafe {
39973        static_assert_rounding!(ROUNDING);
39974        let mut fmsub: f32 = simd_extract!(a, 0);
39975        if (k & 0b00000001) != 0 {
39976            let extractb: f32 = simd_extract!(b, 0);
39977            let extractc: f32 = simd_extract!(c, 0);
39978            let extractc = -extractc;
39979            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
39980        }
39981        simd_insert!(a, 0, fmsub)
39982    }
39983}
39984
39985/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39986///
39987/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39988/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39989/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39990/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39991/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39992/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39993///
39994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
39995#[inline]
39996#[target_feature(enable = "avx512f")]
39997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39998#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39999#[rustc_legacy_const_generics(4)]
40000pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
40001    k: __mmask8,
40002    a: __m128,
40003    b: __m128,
40004    c: __m128,
40005) -> __m128 {
40006    unsafe {
40007        static_assert_rounding!(ROUNDING);
40008        let mut fmsub: f32 = 0.;
40009        if (k & 0b00000001) != 0 {
40010            let extracta: f32 = simd_extract!(a, 0);
40011            let extractb: f32 = simd_extract!(b, 0);
40012            let extractc: f32 = simd_extract!(c, 0);
40013            let extractc = -extractc;
40014            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40015        }
40016        simd_insert!(a, 0, fmsub)
40017    }
40018}
40019
40020/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40021///
40022/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40023/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40024/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40025/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40026/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40028///
40029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40030#[inline]
40031#[target_feature(enable = "avx512f")]
40032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40033#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40034#[rustc_legacy_const_generics(4)]
40035pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40036    a: __m128,
40037    b: __m128,
40038    c: __m128,
40039    k: __mmask8,
40040) -> __m128 {
40041    unsafe {
40042        static_assert_rounding!(ROUNDING);
40043        let mut fmsub: f32 = simd_extract!(c, 0);
40044        if (k & 0b00000001) != 0 {
40045            let extracta: f32 = simd_extract!(a, 0);
40046            let extractb: f32 = simd_extract!(b, 0);
40047            let extractc = -fmsub;
40048            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40049        }
40050        simd_insert!(c, 0, fmsub)
40051    }
40052}
40053
40054/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40055///
40056/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40057/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40058/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40059/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40060/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40061/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40062///
40063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40064#[inline]
40065#[target_feature(enable = "avx512f")]
40066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40067#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40068#[rustc_legacy_const_generics(3)]
40069pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40070    unsafe {
40071        static_assert_rounding!(ROUNDING);
40072        let extracta: f64 = simd_extract!(a, 0);
40073        let extractb: f64 = simd_extract!(b, 0);
40074        let extractc: f64 = simd_extract!(c, 0);
40075        let extractc = -extractc;
40076        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40077        simd_insert!(a, 0, fmsub)
40078    }
40079}
40080
40081/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40082///
40083/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40084/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40085/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40086/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40087/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40088/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40089///
40090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40091#[inline]
40092#[target_feature(enable = "avx512f")]
40093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40094#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40095#[rustc_legacy_const_generics(4)]
40096pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40097    a: __m128d,
40098    k: __mmask8,
40099    b: __m128d,
40100    c: __m128d,
40101) -> __m128d {
40102    unsafe {
40103        static_assert_rounding!(ROUNDING);
40104        let mut fmsub: f64 = simd_extract!(a, 0);
40105        if (k & 0b00000001) != 0 {
40106            let extractb: f64 = simd_extract!(b, 0);
40107            let extractc: f64 = simd_extract!(c, 0);
40108            let extractc = -extractc;
40109            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40110        }
40111        simd_insert!(a, 0, fmsub)
40112    }
40113}
40114
40115/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40116///
40117/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40118/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40119/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40120/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40121/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40122/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40123///
40124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40125#[inline]
40126#[target_feature(enable = "avx512f")]
40127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40128#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40129#[rustc_legacy_const_generics(4)]
40130pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40131    k: __mmask8,
40132    a: __m128d,
40133    b: __m128d,
40134    c: __m128d,
40135) -> __m128d {
40136    unsafe {
40137        static_assert_rounding!(ROUNDING);
40138        let mut fmsub: f64 = 0.;
40139        if (k & 0b00000001) != 0 {
40140            let extracta: f64 = simd_extract!(a, 0);
40141            let extractb: f64 = simd_extract!(b, 0);
40142            let extractc: f64 = simd_extract!(c, 0);
40143            let extractc = -extractc;
40144            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40145        }
40146        simd_insert!(a, 0, fmsub)
40147    }
40148}
40149
40150/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40151///
40152/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40153/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40154/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40155/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40156/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40157/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40158///
40159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40160#[inline]
40161#[target_feature(enable = "avx512f")]
40162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40163#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40164#[rustc_legacy_const_generics(4)]
40165pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40166    a: __m128d,
40167    b: __m128d,
40168    c: __m128d,
40169    k: __mmask8,
40170) -> __m128d {
40171    unsafe {
40172        static_assert_rounding!(ROUNDING);
40173        let mut fmsub: f64 = simd_extract!(c, 0);
40174        if (k & 0b00000001) != 0 {
40175            let extracta: f64 = simd_extract!(a, 0);
40176            let extractb: f64 = simd_extract!(b, 0);
40177            let extractc = -fmsub;
40178            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40179        }
40180        simd_insert!(c, 0, fmsub)
40181    }
40182}
40183
40184/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40185///
40186/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40187/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40188/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40189/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40190/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40191/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40192///
40193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40194#[inline]
40195#[target_feature(enable = "avx512f")]
40196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40197#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40198#[rustc_legacy_const_generics(3)]
40199pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40200    unsafe {
40201        static_assert_rounding!(ROUNDING);
40202        let extracta: f32 = simd_extract!(a, 0);
40203        let extracta = -extracta;
40204        let extractb: f32 = simd_extract!(b, 0);
40205        let extractc: f32 = simd_extract!(c, 0);
40206        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40207        simd_insert!(a, 0, fnmadd)
40208    }
40209}
40210
40211/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40212///
40213/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40219///
40220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40221#[inline]
40222#[target_feature(enable = "avx512f")]
40223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40224#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40225#[rustc_legacy_const_generics(4)]
40226pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40227    a: __m128,
40228    k: __mmask8,
40229    b: __m128,
40230    c: __m128,
40231) -> __m128 {
40232    unsafe {
40233        static_assert_rounding!(ROUNDING);
40234        let mut fnmadd: f32 = simd_extract!(a, 0);
40235        if (k & 0b00000001) != 0 {
40236            let extracta = -fnmadd;
40237            let extractb: f32 = simd_extract!(b, 0);
40238            let extractc: f32 = simd_extract!(c, 0);
40239            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40240        }
40241        simd_insert!(a, 0, fnmadd)
40242    }
40243}
40244
40245/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40246///
40247/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40248/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40249/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40250/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40251/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40252/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40253///
40254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40255#[inline]
40256#[target_feature(enable = "avx512f")]
40257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40258#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40259#[rustc_legacy_const_generics(4)]
40260pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40261    k: __mmask8,
40262    a: __m128,
40263    b: __m128,
40264    c: __m128,
40265) -> __m128 {
40266    unsafe {
40267        static_assert_rounding!(ROUNDING);
40268        let mut fnmadd: f32 = 0.;
40269        if (k & 0b00000001) != 0 {
40270            let extracta: f32 = simd_extract!(a, 0);
40271            let extracta = -extracta;
40272            let extractb: f32 = simd_extract!(b, 0);
40273            let extractc: f32 = simd_extract!(c, 0);
40274            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40275        }
40276        simd_insert!(a, 0, fnmadd)
40277    }
40278}
40279
40280/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40281///
40282/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40283/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40284/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40285/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40286/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40288///
40289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40290#[inline]
40291#[target_feature(enable = "avx512f")]
40292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40293#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40294#[rustc_legacy_const_generics(4)]
40295pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40296    a: __m128,
40297    b: __m128,
40298    c: __m128,
40299    k: __mmask8,
40300) -> __m128 {
40301    unsafe {
40302        static_assert_rounding!(ROUNDING);
40303        let mut fnmadd: f32 = simd_extract!(c, 0);
40304        if (k & 0b00000001) != 0 {
40305            let extracta: f32 = simd_extract!(a, 0);
40306            let extracta = -extracta;
40307            let extractb: f32 = simd_extract!(b, 0);
40308            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40309        }
40310        simd_insert!(c, 0, fnmadd)
40311    }
40312}
40313
40314/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40315///
40316/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40322///
40323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40324#[inline]
40325#[target_feature(enable = "avx512f")]
40326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40327#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40328#[rustc_legacy_const_generics(3)]
40329pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40330    unsafe {
40331        static_assert_rounding!(ROUNDING);
40332        let extracta: f64 = simd_extract!(a, 0);
40333        let extracta = -extracta;
40334        let extractb: f64 = simd_extract!(b, 0);
40335        let extractc: f64 = simd_extract!(c, 0);
40336        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40337        simd_insert!(a, 0, fnmadd)
40338    }
40339}
40340
40341/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40342///
40343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40349///
40350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40351#[inline]
40352#[target_feature(enable = "avx512f")]
40353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40354#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40355#[rustc_legacy_const_generics(4)]
40356pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40357    a: __m128d,
40358    k: __mmask8,
40359    b: __m128d,
40360    c: __m128d,
40361) -> __m128d {
40362    unsafe {
40363        static_assert_rounding!(ROUNDING);
40364        let mut fnmadd: f64 = simd_extract!(a, 0);
40365        if (k & 0b00000001) != 0 {
40366            let extracta = -fnmadd;
40367            let extractb: f64 = simd_extract!(b, 0);
40368            let extractc: f64 = simd_extract!(c, 0);
40369            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40370        }
40371        simd_insert!(a, 0, fnmadd)
40372    }
40373}
40374
40375/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40376///
40377/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40378/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40379/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40380/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40381/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40382/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40383///
40384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40385#[inline]
40386#[target_feature(enable = "avx512f")]
40387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40388#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40389#[rustc_legacy_const_generics(4)]
40390pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40391    k: __mmask8,
40392    a: __m128d,
40393    b: __m128d,
40394    c: __m128d,
40395) -> __m128d {
40396    unsafe {
40397        static_assert_rounding!(ROUNDING);
40398        let mut fnmadd: f64 = 0.;
40399        if (k & 0b00000001) != 0 {
40400            let extracta: f64 = simd_extract!(a, 0);
40401            let extracta = -extracta;
40402            let extractb: f64 = simd_extract!(b, 0);
40403            let extractc: f64 = simd_extract!(c, 0);
40404            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40405        }
40406        simd_insert!(a, 0, fnmadd)
40407    }
40408}
40409
40410/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40411///
40412/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40413/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40414/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40415/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40416/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40417/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40418///
40419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40420#[inline]
40421#[target_feature(enable = "avx512f")]
40422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40423#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40424#[rustc_legacy_const_generics(4)]
40425pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40426    a: __m128d,
40427    b: __m128d,
40428    c: __m128d,
40429    k: __mmask8,
40430) -> __m128d {
40431    unsafe {
40432        static_assert_rounding!(ROUNDING);
40433        let mut fnmadd: f64 = simd_extract!(c, 0);
40434        if (k & 0b00000001) != 0 {
40435            let extracta: f64 = simd_extract!(a, 0);
40436            let extracta = -extracta;
40437            let extractb: f64 = simd_extract!(b, 0);
40438            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40439        }
40440        simd_insert!(c, 0, fnmadd)
40441    }
40442}
40443
40444/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40445///
40446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40452///
40453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40454#[inline]
40455#[target_feature(enable = "avx512f")]
40456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40457#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40458#[rustc_legacy_const_generics(3)]
40459pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40460    unsafe {
40461        static_assert_rounding!(ROUNDING);
40462        let extracta: f32 = simd_extract!(a, 0);
40463        let extracta = -extracta;
40464        let extractb: f32 = simd_extract!(b, 0);
40465        let extractc: f32 = simd_extract!(c, 0);
40466        let extractc = -extractc;
40467        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40468        simd_insert!(a, 0, fnmsub)
40469    }
40470}
40471
40472/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40473///
40474/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40475/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40476/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40477/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40478/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40479/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40480///
40481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40482#[inline]
40483#[target_feature(enable = "avx512f")]
40484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40485#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40486#[rustc_legacy_const_generics(4)]
40487pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40488    a: __m128,
40489    k: __mmask8,
40490    b: __m128,
40491    c: __m128,
40492) -> __m128 {
40493    unsafe {
40494        static_assert_rounding!(ROUNDING);
40495        let mut fnmsub: f32 = simd_extract!(a, 0);
40496        if (k & 0b00000001) != 0 {
40497            let extracta = -fnmsub;
40498            let extractb: f32 = simd_extract!(b, 0);
40499            let extractc: f32 = simd_extract!(c, 0);
40500            let extractc = -extractc;
40501            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40502        }
40503        simd_insert!(a, 0, fnmsub)
40504    }
40505}
40506
40507/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40508///
40509/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40510/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40511/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40512/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40513/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40514/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40515///
40516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40517#[inline]
40518#[target_feature(enable = "avx512f")]
40519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40520#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40521#[rustc_legacy_const_generics(4)]
40522pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40523    k: __mmask8,
40524    a: __m128,
40525    b: __m128,
40526    c: __m128,
40527) -> __m128 {
40528    unsafe {
40529        static_assert_rounding!(ROUNDING);
40530        let mut fnmsub: f32 = 0.;
40531        if (k & 0b00000001) != 0 {
40532            let extracta: f32 = simd_extract!(a, 0);
40533            let extracta = -extracta;
40534            let extractb: f32 = simd_extract!(b, 0);
40535            let extractc: f32 = simd_extract!(c, 0);
40536            let extractc = -extractc;
40537            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40538        }
40539        simd_insert!(a, 0, fnmsub)
40540    }
40541}
40542
40543/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40544///
40545/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40546/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40547/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40548/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40549/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40551///
40552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40553#[inline]
40554#[target_feature(enable = "avx512f")]
40555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40556#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40557#[rustc_legacy_const_generics(4)]
40558pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40559    a: __m128,
40560    b: __m128,
40561    c: __m128,
40562    k: __mmask8,
40563) -> __m128 {
40564    unsafe {
40565        static_assert_rounding!(ROUNDING);
40566        let mut fnmsub: f32 = simd_extract!(c, 0);
40567        if (k & 0b00000001) != 0 {
40568            let extracta: f32 = simd_extract!(a, 0);
40569            let extracta = -extracta;
40570            let extractb: f32 = simd_extract!(b, 0);
40571            let extractc = -fnmsub;
40572            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40573        }
40574        simd_insert!(c, 0, fnmsub)
40575    }
40576}
40577
40578/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40579///
40580/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40581/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40582/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40583/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40584/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40585/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40586///
40587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40588#[inline]
40589#[target_feature(enable = "avx512f")]
40590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40591#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40592#[rustc_legacy_const_generics(3)]
40593pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40594    unsafe {
40595        static_assert_rounding!(ROUNDING);
40596        let extracta: f64 = simd_extract!(a, 0);
40597        let extracta = -extracta;
40598        let extractb: f64 = simd_extract!(b, 0);
40599        let extractc: f64 = simd_extract!(c, 0);
40600        let extractc = -extractc;
40601        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40602        simd_insert!(a, 0, fnmsub)
40603    }
40604}
40605
40606/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40607///
40608/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40609/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40610/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40611/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40612/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40613/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40614///
40615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40616#[inline]
40617#[target_feature(enable = "avx512f")]
40618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40619#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40620#[rustc_legacy_const_generics(4)]
40621pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40622    a: __m128d,
40623    k: __mmask8,
40624    b: __m128d,
40625    c: __m128d,
40626) -> __m128d {
40627    unsafe {
40628        static_assert_rounding!(ROUNDING);
40629        let mut fnmsub: f64 = simd_extract!(a, 0);
40630        if (k & 0b00000001) != 0 {
40631            let extracta = -fnmsub;
40632            let extractb: f64 = simd_extract!(b, 0);
40633            let extractc: f64 = simd_extract!(c, 0);
40634            let extractc = -extractc;
40635            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40636        }
40637        simd_insert!(a, 0, fnmsub)
40638    }
40639}
40640
40641/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40642///
40643/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40644/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40645/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40646/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40647/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40648/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40649///
40650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40651#[inline]
40652#[target_feature(enable = "avx512f")]
40653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40654#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40655#[rustc_legacy_const_generics(4)]
40656pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40657    k: __mmask8,
40658    a: __m128d,
40659    b: __m128d,
40660    c: __m128d,
40661) -> __m128d {
40662    unsafe {
40663        static_assert_rounding!(ROUNDING);
40664        let mut fnmsub: f64 = 0.;
40665        if (k & 0b00000001) != 0 {
40666            let extracta: f64 = simd_extract!(a, 0);
40667            let extracta = -extracta;
40668            let extractb: f64 = simd_extract!(b, 0);
40669            let extractc: f64 = simd_extract!(c, 0);
40670            let extractc = -extractc;
40671            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40672        }
40673        simd_insert!(a, 0, fnmsub)
40674    }
40675}
40676
40677/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40678///
40679/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40680/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40681/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40682/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40683/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40684/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40685///
40686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40687#[inline]
40688#[target_feature(enable = "avx512f")]
40689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40690#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40691#[rustc_legacy_const_generics(4)]
40692pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40693    a: __m128d,
40694    b: __m128d,
40695    c: __m128d,
40696    k: __mmask8,
40697) -> __m128d {
40698    unsafe {
40699        static_assert_rounding!(ROUNDING);
40700        let mut fnmsub: f64 = simd_extract!(c, 0);
40701        if (k & 0b00000001) != 0 {
40702            let extracta: f64 = simd_extract!(a, 0);
40703            let extracta = -extracta;
40704            let extractb: f64 = simd_extract!(b, 0);
40705            let extractc = -fnmsub;
40706            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40707        }
40708        simd_insert!(c, 0, fnmsub)
40709    }
40710}
40711
40712/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40713///
40714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40715#[inline]
40716#[target_feature(enable = "avx512f")]
40717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40718#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40719#[rustc_legacy_const_generics(3)]
40720pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40721    unsafe {
40722        static_assert_uimm_bits!(IMM8, 8);
40723        let a = a.as_f32x4();
40724        let b = b.as_f32x4();
40725        let c = c.as_i32x4();
40726        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40727        let fixupimm: f32 = simd_extract!(r, 0);
40728        let r = simd_insert!(a, 0, fixupimm);
40729        transmute(r)
40730    }
40731}
40732
40733/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40734///
40735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40736#[inline]
40737#[target_feature(enable = "avx512f")]
40738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40739#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40740#[rustc_legacy_const_generics(4)]
40741pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40742    a: __m128,
40743    k: __mmask8,
40744    b: __m128,
40745    c: __m128i,
40746) -> __m128 {
40747    unsafe {
40748        static_assert_uimm_bits!(IMM8, 8);
40749        let a = a.as_f32x4();
40750        let b = b.as_f32x4();
40751        let c = c.as_i32x4();
40752        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40753        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40754        let r = simd_insert!(a, 0, fixupimm);
40755        transmute(r)
40756    }
40757}
40758
40759/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40760///
40761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40762#[inline]
40763#[target_feature(enable = "avx512f")]
40764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40765#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40766#[rustc_legacy_const_generics(4)]
40767pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40768    k: __mmask8,
40769    a: __m128,
40770    b: __m128,
40771    c: __m128i,
40772) -> __m128 {
40773    unsafe {
40774        static_assert_uimm_bits!(IMM8, 8);
40775        let a = a.as_f32x4();
40776        let b = b.as_f32x4();
40777        let c = c.as_i32x4();
40778        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40779        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40780        let r = simd_insert!(a, 0, fixupimm);
40781        transmute(r)
40782    }
40783}
40784
40785/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40786///
40787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40788#[inline]
40789#[target_feature(enable = "avx512f")]
40790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40791#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40792#[rustc_legacy_const_generics(3)]
40793pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40794    unsafe {
40795        static_assert_uimm_bits!(IMM8, 8);
40796        let a = a.as_f64x2();
40797        let b = b.as_f64x2();
40798        let c = c.as_i64x2();
40799        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40800        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40801        let r = simd_insert!(a, 0, fixupimm);
40802        transmute(r)
40803    }
40804}
40805
40806/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40807///
40808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40809#[inline]
40810#[target_feature(enable = "avx512f")]
40811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40812#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40813#[rustc_legacy_const_generics(4)]
40814pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40815    a: __m128d,
40816    k: __mmask8,
40817    b: __m128d,
40818    c: __m128i,
40819) -> __m128d {
40820    unsafe {
40821        static_assert_uimm_bits!(IMM8, 8);
40822        let a = a.as_f64x2();
40823        let b = b.as_f64x2();
40824        let c = c.as_i64x2();
40825        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40826        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40827        let r = simd_insert!(a, 0, fixupimm);
40828        transmute(r)
40829    }
40830}
40831
40832/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40833///
40834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40835#[inline]
40836#[target_feature(enable = "avx512f")]
40837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40838#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40839#[rustc_legacy_const_generics(4)]
40840pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40841    k: __mmask8,
40842    a: __m128d,
40843    b: __m128d,
40844    c: __m128i,
40845) -> __m128d {
40846    unsafe {
40847        static_assert_uimm_bits!(IMM8, 8);
40848        let a = a.as_f64x2();
40849        let b = b.as_f64x2();
40850        let c = c.as_i64x2();
40851        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40852        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40853        let r = simd_insert!(a, 0, fixupimm);
40854        transmute(r)
40855    }
40856}
40857
40858/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40859/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40860///
40861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40862#[inline]
40863#[target_feature(enable = "avx512f")]
40864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40865#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40866#[rustc_legacy_const_generics(3, 4)]
40867pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40868    a: __m128,
40869    b: __m128,
40870    c: __m128i,
40871) -> __m128 {
40872    unsafe {
40873        static_assert_uimm_bits!(IMM8, 8);
40874        static_assert_mantissas_sae!(SAE);
40875        let a = a.as_f32x4();
40876        let b = b.as_f32x4();
40877        let c = c.as_i32x4();
40878        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40879        let fixupimm: f32 = simd_extract!(r, 0);
40880        let r = simd_insert!(a, 0, fixupimm);
40881        transmute(r)
40882    }
40883}
40884
40885/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40886/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40887///
40888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40889#[inline]
40890#[target_feature(enable = "avx512f")]
40891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40892#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40893#[rustc_legacy_const_generics(4, 5)]
40894pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40895    a: __m128,
40896    k: __mmask8,
40897    b: __m128,
40898    c: __m128i,
40899) -> __m128 {
40900    unsafe {
40901        static_assert_uimm_bits!(IMM8, 8);
40902        static_assert_mantissas_sae!(SAE);
40903        let a = a.as_f32x4();
40904        let b = b.as_f32x4();
40905        let c = c.as_i32x4();
40906        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40907        let fixupimm: f32 = simd_extract!(r, 0);
40908        let r = simd_insert!(a, 0, fixupimm);
40909        transmute(r)
40910    }
40911}
40912
40913/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40915///
40916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40917#[inline]
40918#[target_feature(enable = "avx512f")]
40919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40920#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40921#[rustc_legacy_const_generics(4, 5)]
40922pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40923    k: __mmask8,
40924    a: __m128,
40925    b: __m128,
40926    c: __m128i,
40927) -> __m128 {
40928    unsafe {
40929        static_assert_uimm_bits!(IMM8, 8);
40930        static_assert_mantissas_sae!(SAE);
40931        let a = a.as_f32x4();
40932        let b = b.as_f32x4();
40933        let c = c.as_i32x4();
40934        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
40935        let fixupimm: f32 = simd_extract!(r, 0);
40936        let r = simd_insert!(a, 0, fixupimm);
40937        transmute(r)
40938    }
40939}
40940
40941/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40943///
40944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
40945#[inline]
40946#[target_feature(enable = "avx512f")]
40947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40948#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40949#[rustc_legacy_const_generics(3, 4)]
40950pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40951    a: __m128d,
40952    b: __m128d,
40953    c: __m128i,
40954) -> __m128d {
40955    unsafe {
40956        static_assert_uimm_bits!(IMM8, 8);
40957        static_assert_mantissas_sae!(SAE);
40958        let a = a.as_f64x2();
40959        let b = b.as_f64x2();
40960        let c = c.as_i64x2();
40961        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
40962        let fixupimm: f64 = simd_extract!(r, 0);
40963        let r = simd_insert!(a, 0, fixupimm);
40964        transmute(r)
40965    }
40966}
40967
40968/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40969/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40970///
40971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
40972#[inline]
40973#[target_feature(enable = "avx512f")]
40974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40975#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40976#[rustc_legacy_const_generics(4, 5)]
40977pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40978    a: __m128d,
40979    k: __mmask8,
40980    b: __m128d,
40981    c: __m128i,
40982) -> __m128d {
40983    unsafe {
40984        static_assert_uimm_bits!(IMM8, 8);
40985        static_assert_mantissas_sae!(SAE);
40986        let a = a.as_f64x2();
40987        let b = b.as_f64x2();
40988        let c = c.as_i64x2();
40989        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
40990        let fixupimm: f64 = simd_extract!(r, 0);
40991        let r = simd_insert!(a, 0, fixupimm);
40992        transmute(r)
40993    }
40994}
40995
40996/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40997/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40998///
40999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
41000#[inline]
41001#[target_feature(enable = "avx512f")]
41002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41003#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41004#[rustc_legacy_const_generics(4, 5)]
41005pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41006    k: __mmask8,
41007    a: __m128d,
41008    b: __m128d,
41009    c: __m128i,
41010) -> __m128d {
41011    unsafe {
41012        static_assert_uimm_bits!(IMM8, 8);
41013        static_assert_mantissas_sae!(SAE);
41014        let a = a.as_f64x2();
41015        let b = b.as_f64x2();
41016        let c = c.as_i64x2();
41017        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41018        let fixupimm: f64 = simd_extract!(r, 0);
41019        let r = simd_insert!(a, 0, fixupimm);
41020        transmute(r)
41021    }
41022}
41023
41024/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41025///
41026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41027#[inline]
41028#[target_feature(enable = "avx512f")]
41029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41030#[cfg_attr(test, assert_instr(vcvtss2sd))]
41031pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41032    unsafe {
41033        transmute(vcvtss2sd(
41034            a.as_f64x2(),
41035            b.as_f32x4(),
41036            src.as_f64x2(),
41037            k,
41038            _MM_FROUND_CUR_DIRECTION,
41039        ))
41040    }
41041}
41042
41043/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41044///
41045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41046#[inline]
41047#[target_feature(enable = "avx512f")]
41048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41049#[cfg_attr(test, assert_instr(vcvtss2sd))]
41050pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41051    unsafe {
41052        transmute(vcvtss2sd(
41053            a.as_f64x2(),
41054            b.as_f32x4(),
41055            f64x2::ZERO,
41056            k,
41057            _MM_FROUND_CUR_DIRECTION,
41058        ))
41059    }
41060}
41061
41062/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41063///
41064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41065#[inline]
41066#[target_feature(enable = "avx512f")]
41067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41068#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41069pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41070    unsafe {
41071        transmute(vcvtsd2ss(
41072            a.as_f32x4(),
41073            b.as_f64x2(),
41074            src.as_f32x4(),
41075            k,
41076            _MM_FROUND_CUR_DIRECTION,
41077        ))
41078    }
41079}
41080
41081/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41082///
41083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41084#[inline]
41085#[target_feature(enable = "avx512f")]
41086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41087#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41088pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41089    unsafe {
41090        transmute(vcvtsd2ss(
41091            a.as_f32x4(),
41092            b.as_f64x2(),
41093            f32x4::ZERO,
41094            k,
41095            _MM_FROUND_CUR_DIRECTION,
41096        ))
41097    }
41098}
41099
41100/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41101/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41102///
41103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41104#[inline]
41105#[target_feature(enable = "avx512f")]
41106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41107#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41108#[rustc_legacy_const_generics(2)]
41109pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41110    unsafe {
41111        static_assert_sae!(SAE);
41112        let a = a.as_f64x2();
41113        let b = b.as_f32x4();
41114        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41115        transmute(r)
41116    }
41117}
41118
41119/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41120/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41121///
41122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41123#[inline]
41124#[target_feature(enable = "avx512f")]
41125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41126#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41127#[rustc_legacy_const_generics(4)]
41128pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41129    src: __m128d,
41130    k: __mmask8,
41131    a: __m128d,
41132    b: __m128,
41133) -> __m128d {
41134    unsafe {
41135        static_assert_sae!(SAE);
41136        let a = a.as_f64x2();
41137        let b = b.as_f32x4();
41138        let src = src.as_f64x2();
41139        let r = vcvtss2sd(a, b, src, k, SAE);
41140        transmute(r)
41141    }
41142}
41143
41144/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41145/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41146///
41147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41148#[inline]
41149#[target_feature(enable = "avx512f")]
41150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41151#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41152#[rustc_legacy_const_generics(3)]
41153pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41154    unsafe {
41155        static_assert_sae!(SAE);
41156        let a = a.as_f64x2();
41157        let b = b.as_f32x4();
41158        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41159        transmute(r)
41160    }
41161}
41162
41163/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41164/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41165/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41166/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41167/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41168/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41169/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41170///
41171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41172#[inline]
41173#[target_feature(enable = "avx512f")]
41174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41175#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41176#[rustc_legacy_const_generics(2)]
41177pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41178    unsafe {
41179        static_assert_rounding!(ROUNDING);
41180        let a = a.as_f32x4();
41181        let b = b.as_f64x2();
41182        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41183        transmute(r)
41184    }
41185}
41186
41187/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41188/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41189/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41190/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41191/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41192/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41193/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41194///
41195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41196#[inline]
41197#[target_feature(enable = "avx512f")]
41198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41199#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41200#[rustc_legacy_const_generics(4)]
41201pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41202    src: __m128,
41203    k: __mmask8,
41204    a: __m128,
41205    b: __m128d,
41206) -> __m128 {
41207    unsafe {
41208        static_assert_rounding!(ROUNDING);
41209        let a = a.as_f32x4();
41210        let b = b.as_f64x2();
41211        let src = src.as_f32x4();
41212        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41213        transmute(r)
41214    }
41215}
41216
41217/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41224///
41225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41226#[inline]
41227#[target_feature(enable = "avx512f")]
41228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41229#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41230#[rustc_legacy_const_generics(3)]
41231pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41232    unsafe {
41233        static_assert_rounding!(ROUNDING);
41234        let a = a.as_f32x4();
41235        let b = b.as_f64x2();
41236        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41237        transmute(r)
41238    }
41239}
41240
41241/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41242/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41243/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41244/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41245/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41246/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41247/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41248///
41249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41250#[inline]
41251#[target_feature(enable = "avx512f")]
41252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41253#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41254#[rustc_legacy_const_generics(1)]
41255pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41256    unsafe {
41257        static_assert_rounding!(ROUNDING);
41258        let a = a.as_f32x4();
41259        vcvtss2si(a, ROUNDING)
41260    }
41261}
41262
41263/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41265/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41266/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41267/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41268/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41269/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41270///
41271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41272#[inline]
41273#[target_feature(enable = "avx512f")]
41274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41275#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41276#[rustc_legacy_const_generics(1)]
41277pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41278    unsafe {
41279        static_assert_rounding!(ROUNDING);
41280        let a = a.as_f32x4();
41281        vcvtss2si(a, ROUNDING)
41282    }
41283}
41284
41285/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41292///
41293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41294#[inline]
41295#[target_feature(enable = "avx512f")]
41296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41297#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41298#[rustc_legacy_const_generics(1)]
41299pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41300    unsafe {
41301        static_assert_rounding!(ROUNDING);
41302        let a = a.as_f32x4();
41303        vcvtss2usi(a, ROUNDING)
41304    }
41305}
41306
41307/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41308///
41309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41310#[inline]
41311#[target_feature(enable = "avx512f")]
41312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41313#[cfg_attr(test, assert_instr(vcvtss2si))]
41314pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41315    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41316}
41317
41318/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41319///
41320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41321#[inline]
41322#[target_feature(enable = "avx512f")]
41323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41324#[cfg_attr(test, assert_instr(vcvtss2usi))]
41325pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41326    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41327}
41328
41329/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41330/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41331/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41332/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41333/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41334/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41336///
41337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41338#[inline]
41339#[target_feature(enable = "avx512f")]
41340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41341#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41342#[rustc_legacy_const_generics(1)]
41343pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41344    unsafe {
41345        static_assert_rounding!(ROUNDING);
41346        let a = a.as_f64x2();
41347        vcvtsd2si(a, ROUNDING)
41348    }
41349}
41350
41351/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41358///
41359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41360#[inline]
41361#[target_feature(enable = "avx512f")]
41362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41363#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41364#[rustc_legacy_const_generics(1)]
41365pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41366    unsafe {
41367        static_assert_rounding!(ROUNDING);
41368        let a = a.as_f64x2();
41369        vcvtsd2si(a, ROUNDING)
41370    }
41371}
41372
41373/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41374/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41375/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41376/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41377/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41378/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41379/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41380///
41381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41382#[inline]
41383#[target_feature(enable = "avx512f")]
41384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41385#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41386#[rustc_legacy_const_generics(1)]
41387pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41388    unsafe {
41389        static_assert_rounding!(ROUNDING);
41390        let a = a.as_f64x2();
41391        vcvtsd2usi(a, ROUNDING)
41392    }
41393}
41394
41395/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41396///
41397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41398#[inline]
41399#[target_feature(enable = "avx512f")]
41400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41401#[cfg_attr(test, assert_instr(vcvtsd2si))]
41402pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41403    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41404}
41405
41406/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41407///
41408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41409#[inline]
41410#[target_feature(enable = "avx512f")]
41411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41412#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41413pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41414    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41415}
41416
41417/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41418///
41419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41425///
41426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41427#[inline]
41428#[target_feature(enable = "avx512f")]
41429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41430#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41431#[rustc_legacy_const_generics(2)]
41432pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41433    unsafe {
41434        static_assert_rounding!(ROUNDING);
41435        let a = a.as_f32x4();
41436        let r = vcvtsi2ss(a, b, ROUNDING);
41437        transmute(r)
41438    }
41439}
41440
41441/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41442///
41443/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41444/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41445/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41446/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41447/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41448/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41449///
41450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41451#[inline]
41452#[target_feature(enable = "avx512f")]
41453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41454#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41455#[rustc_legacy_const_generics(2)]
41456pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41457    unsafe {
41458        static_assert_rounding!(ROUNDING);
41459        let a = a.as_f32x4();
41460        let r = vcvtsi2ss(a, b, ROUNDING);
41461        transmute(r)
41462    }
41463}
41464
41465/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41466/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41467/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41468/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41469/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41470/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41471/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41472///
41473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41474#[inline]
41475#[target_feature(enable = "avx512f")]
41476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41477#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41478#[rustc_legacy_const_generics(2)]
41479pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41480    unsafe {
41481        static_assert_rounding!(ROUNDING);
41482        let a = a.as_f32x4();
41483        let r = vcvtusi2ss(a, b, ROUNDING);
41484        transmute(r)
41485    }
41486}
41487
41488/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41489///
41490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41491#[inline]
41492#[target_feature(enable = "avx512f")]
41493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41494#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41495pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41496    unsafe {
41497        let b = b as f32;
41498        simd_insert!(a, 0, b)
41499    }
41500}
41501
41502/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41503///
41504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41505#[inline]
41506#[target_feature(enable = "avx512f")]
41507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41508#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41509pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41510    unsafe {
41511        let b = b as f64;
41512        simd_insert!(a, 0, b)
41513    }
41514}
41515
41516/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41517/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41518///
41519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41520#[inline]
41521#[target_feature(enable = "avx512f")]
41522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41523#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41524#[rustc_legacy_const_generics(1)]
41525pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41526    unsafe {
41527        static_assert_sae!(SAE);
41528        let a = a.as_f32x4();
41529        vcvttss2si(a, SAE)
41530    }
41531}
41532
41533/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41534/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41535///
41536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41537#[inline]
41538#[target_feature(enable = "avx512f")]
41539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41540#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41541#[rustc_legacy_const_generics(1)]
41542pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41543    unsafe {
41544        static_assert_sae!(SAE);
41545        let a = a.as_f32x4();
41546        vcvttss2si(a, SAE)
41547    }
41548}
41549
41550/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41551/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41552///
41553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41554#[inline]
41555#[target_feature(enable = "avx512f")]
41556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41557#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41558#[rustc_legacy_const_generics(1)]
41559pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41560    unsafe {
41561        static_assert_sae!(SAE);
41562        let a = a.as_f32x4();
41563        vcvttss2usi(a, SAE)
41564    }
41565}
41566
41567/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41568///
41569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41570#[inline]
41571#[target_feature(enable = "avx512f")]
41572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41573#[cfg_attr(test, assert_instr(vcvttss2si))]
41574pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41575    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41576}
41577
41578/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41579///
41580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41581#[inline]
41582#[target_feature(enable = "avx512f")]
41583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41584#[cfg_attr(test, assert_instr(vcvttss2usi))]
41585pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41586    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41587}
41588
41589/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41591///
41592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41593#[inline]
41594#[target_feature(enable = "avx512f")]
41595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41596#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41597#[rustc_legacy_const_generics(1)]
41598pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41599    unsafe {
41600        static_assert_sae!(SAE);
41601        let a = a.as_f64x2();
41602        vcvttsd2si(a, SAE)
41603    }
41604}
41605
41606/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41607/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41608///
41609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41610#[inline]
41611#[target_feature(enable = "avx512f")]
41612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41613#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41614#[rustc_legacy_const_generics(1)]
41615pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41616    unsafe {
41617        static_assert_sae!(SAE);
41618        let a = a.as_f64x2();
41619        vcvttsd2si(a, SAE)
41620    }
41621}
41622
41623/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41624/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41625///
41626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41627#[inline]
41628#[target_feature(enable = "avx512f")]
41629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41630#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41631#[rustc_legacy_const_generics(1)]
41632pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41633    unsafe {
41634        static_assert_sae!(SAE);
41635        let a = a.as_f64x2();
41636        vcvttsd2usi(a, SAE)
41637    }
41638}
41639
41640/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41641///
41642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41643#[inline]
41644#[target_feature(enable = "avx512f")]
41645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41646#[cfg_attr(test, assert_instr(vcvttsd2si))]
41647pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41648    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41649}
41650
41651/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41652///
41653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41654#[inline]
41655#[target_feature(enable = "avx512f")]
41656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41657#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41658pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41659    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41660}
41661
41662/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41663///
41664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41665#[inline]
41666#[target_feature(enable = "avx512f")]
41667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41668#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41669pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41670    unsafe {
41671        let b = b as f32;
41672        simd_insert!(a, 0, b)
41673    }
41674}
41675
41676/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41677///
41678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41679#[inline]
41680#[target_feature(enable = "avx512f")]
41681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41682#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41683pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41684    unsafe {
41685        let b = b as f64;
41686        simd_insert!(a, 0, b)
41687    }
41688}
41689
41690/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41692///
41693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41694#[inline]
41695#[target_feature(enable = "avx512f")]
41696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41697#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41698#[rustc_legacy_const_generics(2, 3)]
41699pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41700    unsafe {
41701        static_assert_uimm_bits!(IMM5, 5);
41702        static_assert_mantissas_sae!(SAE);
41703        let a = a.as_f32x4();
41704        let b = b.as_f32x4();
41705        vcomiss(a, b, IMM5, SAE)
41706    }
41707}
41708
41709/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41711///
41712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41713#[inline]
41714#[target_feature(enable = "avx512f")]
41715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41716#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41717#[rustc_legacy_const_generics(2, 3)]
41718pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41719    unsafe {
41720        static_assert_uimm_bits!(IMM5, 5);
41721        static_assert_mantissas_sae!(SAE);
41722        let a = a.as_f64x2();
41723        let b = b.as_f64x2();
41724        vcomisd(a, b, IMM5, SAE)
41725    }
41726}
41727
41728/// Equal
41729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41730pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41731/// Less-than
41732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41733pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41734/// Less-than-or-equal
41735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41736pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41737/// False
41738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41739pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41740/// Not-equal
41741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41742pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41743/// Not less-than
41744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41745pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41746/// Not less-than-or-equal
41747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41748pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41749/// True
41750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41751pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41752
41753/// interval [1, 2)
41754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41755pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41756/// interval [0.5, 2)
41757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41758pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41759/// interval [0.5, 1)
41760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41761pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41762/// interval [0.75, 1.5)
41763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41764pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41765
41766/// sign = sign(SRC)
41767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41768pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41769/// sign = 0
41770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41771pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41772/// DEST = NaN if sign(SRC) = 1
41773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41774pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41775
41776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41777pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41779pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41781pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41783pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41785pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41787pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41789pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41791pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41795pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41797pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41801pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41803pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41807pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41809pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41813pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41817pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41819pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41823pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41825pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41827pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41829pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41833pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41835pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
41940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
41942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
41944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
41946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
41948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
41950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
41952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
41954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
41956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
41958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
41960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
41962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
41964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
41966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
41968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
41970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
41972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
41974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
41976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
41978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
41980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
41982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
41984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
41986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
41988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
41990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
41992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
41994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
41996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
41998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
42000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
42002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
42004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
42006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
42008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
42010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
42012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42271pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42273pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42275pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42277pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42279pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42281pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42283pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42285pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42287pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42288
42289#[allow(improper_ctypes)]
42290unsafe extern "C" {
42291    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42292    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42293    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42294    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42295
42296    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42297    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42298    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42299    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42300
42301    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42302    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42303    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42304    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42305
42306    #[link_name = "llvm.x86.avx512.add.ps.512"]
42307    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42308    #[link_name = "llvm.x86.avx512.add.pd.512"]
42309    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42310    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42311    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42312    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42313    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42314    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42315    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42316    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42317    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42318    #[link_name = "llvm.x86.avx512.div.ps.512"]
42319    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42320    #[link_name = "llvm.x86.avx512.div.pd.512"]
42321    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42322
42323    #[link_name = "llvm.x86.avx512.max.ps.512"]
42324    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42325    #[link_name = "llvm.x86.avx512.max.pd.512"]
42326    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42327    #[link_name = "llvm.x86.avx512.min.ps.512"]
42328    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42329    #[link_name = "llvm.x86.avx512.min.pd.512"]
42330    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42331
42332    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42333    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42334
42335    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42336    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42337    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42338    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42339
42340    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42341    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42342    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42343    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42344    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42345    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42346
42347    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42348    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42349    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42350    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42351    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42352    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42353
42354    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42355    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42356    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42357    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42358    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42359    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42360
42361    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42362    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42363    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42364    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42365    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42366    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42367
42368    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42369    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42370    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42371    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42372    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42373    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42374
42375    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42376    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42377    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42378    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42379    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42380    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42381
42382    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42383    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42384    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42385    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42386    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42387    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42388
42389    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42390    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42391    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42392    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42393    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42394    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42395
42396    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42397    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42398    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42399    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42400    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42401    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42402
42403    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42404    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42405    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42406    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42407    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42408    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42409
42410    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42411    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42412    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42413    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42414    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42415    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42416
42417    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42418    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42419    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42420    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42421    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42422    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42423
42424    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42425    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42426    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42427    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42428    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42429    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42430
42431    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42432    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42433    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42434    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42435    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42436    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42437
42438    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42439    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42440    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42441    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42442    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42443    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42444
42445    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42446    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42447    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42448    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42449    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42450    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42451
42452    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42453    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42454    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42455    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42456    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42457    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42458
42459    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42460    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42461
42462    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42463    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42464    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42465    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42466    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42467    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42468
42469    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42470    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42471    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42472    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42473
42474    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42475    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42476
42477    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42478    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42479    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42480    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42481    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42482    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42483
42484    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42485    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42486    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42487    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42488
42489    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42490    fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42491    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42492    fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42493    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42494    fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42495
42496    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42497    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42498
42499    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42500    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42501    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42502    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42503    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42504    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42505
42506    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42507    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42508    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42509    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42510    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42511    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42512
42513    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42514    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42515    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42516    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42517    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42518    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42519
42520    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42521    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42522    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42523    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42524    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42525    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42526
42527    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42528    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42529    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42530    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42531    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42532    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42533
42534    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42535    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42536    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42537    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42538    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42539    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42540    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42541    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42542    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42543    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42544
42545    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42546    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42547    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42548    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42549    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42550    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42551
42552    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42553    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42554    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42555    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42556    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42557    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42558
42559    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42560    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42561    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42562    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42563    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42564    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42565
42566    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42567    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42568    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42569    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42570    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42571    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42572
42573    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42574    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42575    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42576    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42577    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42578    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42579
42580    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42581    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42582    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42583    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42584    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42585    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42586
42587    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42588    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42589    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42590    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42591    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42592    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42593
42594    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42595    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42596    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42597    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42598    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42599    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42600
42601    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42602    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42603    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42604    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42605    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42606    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42607
42608    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42609    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42610    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42611    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42612    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42613    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42614
42615    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42616    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42617    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42618    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42619    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42620    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42621
42622    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42623    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42624    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42625    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42626    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42627    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42628
42629    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42630    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42631    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42632    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42633    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42634    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42635
42636    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42637    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42638    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42639    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42640    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42641    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42642
42643    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42644    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42645    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42646    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42647    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42648    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42649
42650    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42651    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42652
42653    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42654    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42655    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42656    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42657    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42658    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42659
42660    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42661    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42662    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42663    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42664    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42665    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42666
42667    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42668    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42669    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42670    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42671    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42672    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42673
42674    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42675    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42676    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42677    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42678    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42679    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42680
42681    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42682    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42683    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42684    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42685    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42686    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42687
42688    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42689    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42690    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42691    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42692    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42693    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42694
42695    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42696    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42697    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42698    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42699    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42700    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42701
42702    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42703    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42704    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42705    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42706    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42707    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42708
42709    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42710    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42711    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42712    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42713    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42714    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42715
42716    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42717    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42718    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42719    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42720    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42721    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42722
42723    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42724    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42725    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42726    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42727    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42728    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42729    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42730    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42731    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42732    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42733    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42734    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42735    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42736    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42737    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42738    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42739
42740    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42741    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42742    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42743    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42744    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42745    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42746    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42747    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42748    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42749    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42750
42751    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42752    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42753    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42754    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42755    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42756    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42757
42758    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42759    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42760    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42761    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42762    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42763    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42764    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42765    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42766    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42767    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42768    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42769    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42770    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42771    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42772    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42773    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42774
42775    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42776    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42777    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42778    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42779    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42780    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42781    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42782    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42783    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42784    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42785    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42786    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42787    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42788    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42789    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42790    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42791
42792    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42793    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42794    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42795    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42796    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42797    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42798    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42799    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42800    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42801    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42802    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42803    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42804    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42805    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42806    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42807    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42808
42809    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42810    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42811    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42812    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42813    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42814    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42815    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42816    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42817    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42818    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42819    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42820    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42821    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42822    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42823    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42824    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42825
42826    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42827    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42828    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42829    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42830
42831    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42832    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42833    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42834    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42835    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42836    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42837
42838    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42839    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42840    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42841    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42842    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42843    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42844
42845    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42846    fn vprold(a: i32x16, i8: i32) -> i32x16;
42847    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42848    fn vprold256(a: i32x8, i8: i32) -> i32x8;
42849    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42850    fn vprold128(a: i32x4, i8: i32) -> i32x4;
42851
42852    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42853    fn vprord(a: i32x16, i8: i32) -> i32x16;
42854    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42855    fn vprord256(a: i32x8, i8: i32) -> i32x8;
42856    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42857    fn vprord128(a: i32x4, i8: i32) -> i32x4;
42858
42859    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42860    fn vprolq(a: i64x8, i8: i32) -> i64x8;
42861    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42862    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
42863    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42864    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
42865
42866    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42867    fn vprorq(a: i64x8, i8: i32) -> i64x8;
42868    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42869    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
42870    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42871    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
42872
42873    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42874    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42875    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42876    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42877    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42878    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42879
42880    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42881    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42882    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42883    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42884    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42885    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42886
42887    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42888    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42889    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42890    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42891    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42892    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42893
42894    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42895    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42896    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42897    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42898    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42899    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42900
42901    #[link_name = "llvm.x86.avx512.psllv.d.512"]
42902    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42903    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42904    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42905    #[link_name = "llvm.x86.avx512.psllv.q.512"]
42906    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42907    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42908    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42909
42910    #[link_name = "llvm.x86.avx512.psll.d.512"]
42911    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42912    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42913    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42914    #[link_name = "llvm.x86.avx512.psll.q.512"]
42915    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42916    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42917    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42918
42919    #[link_name = "llvm.x86.avx512.psra.d.512"]
42920    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42921
42922    #[link_name = "llvm.x86.avx512.psra.q.512"]
42923    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42924    #[link_name = "llvm.x86.avx512.psra.q.256"]
42925    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42926    #[link_name = "llvm.x86.avx512.psra.q.128"]
42927    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42928
42929    #[link_name = "llvm.x86.avx512.psrav.d.512"]
42930    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42931
42932    #[link_name = "llvm.x86.avx512.psrav.q.512"]
42933    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42934    #[link_name = "llvm.x86.avx512.psrav.q.256"]
42935    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42936    #[link_name = "llvm.x86.avx512.psrav.q.128"]
42937    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42938
42939    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42940    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42941    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42942    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42943
42944    #[link_name = "llvm.x86.avx512.permvar.si.512"]
42945    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42946
42947    #[link_name = "llvm.x86.avx512.permvar.di.512"]
42948    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42949    #[link_name = "llvm.x86.avx512.permvar.di.256"]
42950    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42951
42952    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
42953    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42954
42955    #[link_name = "llvm.x86.avx512.permvar.df.512"]
42956    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42957    #[link_name = "llvm.x86.avx512.permvar.df.256"]
42958    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42959
42960    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42961    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42962    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42963    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42964    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42965    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42966
42967    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42968    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42969    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42970    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42971    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42972    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42973
42974    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42975    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42976    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42977    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42978    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42979    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42980
42981    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42982    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42983    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42984    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42985    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42986    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42987
42988    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42989    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42990    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42991    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42992    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42993    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42994
42995    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
42996    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
42997    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
42998    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
42999    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
43000    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43001
43002    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
43003    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43004    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
43005    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43006    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
43007    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43008
43009    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
43010    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43011    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
43012    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43013    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43014    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43015
43016    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43017    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43018    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43019    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43020    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43021    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43022
43023    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43024    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43025    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43026    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43027    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43028    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43029
43030    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43031    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43032    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43033    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43034    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43035    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43036
43037    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43038    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43039    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43040    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43041    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43042    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43043
43044    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43045    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43046    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43047    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43048    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43049    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43050
43051    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43052    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43053    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43054    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43055    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43056    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43057
43058    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43059    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43060    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43061    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43062    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43063    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43064
43065    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43066    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43067    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43068    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43069    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43070    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43071
43072    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43073    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43074    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43075    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43076    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43077    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43078    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43079    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43080    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43081    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43082    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43083    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43084    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43085    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43086    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43087    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43088    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43089    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43090    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43091    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43092    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43093    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43094    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43095    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43096    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43097    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43098    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43099    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43100    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43101    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43102    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43103    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43104    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43105    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43106    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43107    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43108
43109    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43110    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43111    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43112    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43113    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43114    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43115    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43116    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43117
43118    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43119    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43120    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43121    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43122    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43123    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43124    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43125    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43126
43127    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43128    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43129    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43130    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43131
43132    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43133    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43134    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43135    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43136    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43137    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43138    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43139    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43140
43141    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43142    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43143    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43144    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43145
43146    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43147    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43148    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43149    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43150
43151    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43152    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43153    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43154    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43155
43156    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43157    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43158
43159    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43160    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43161
43162    #[link_name = "llvm.x86.avx512.cvttss2si"]
43163    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43164    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43165    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43166
43167    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43168    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43169    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43170    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43171
43172    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43173    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43174    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43175    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43176
43177    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43178    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43179    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43180    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43181    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43182    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43183    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43184    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43185    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43186    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43187    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43188    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43189    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43190    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43191    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43192    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43193    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43194    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43195    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43196    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43197    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43198    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43199    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43200    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43201
43202    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43203    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43204    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43205    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43206    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43207    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43208    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43209    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43210    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43211    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43212    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43213    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43214    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43215    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43216    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43217    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43218    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43219    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43220    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43221    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43222    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43223    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43224    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43225    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43226
43227    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43228    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43229    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43230    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43231    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43232    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43233    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43234    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43235    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43236    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43237    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43238    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43239    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43240    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43241    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43242    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43243    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43244    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43245    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43246    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43247    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43248    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43249    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43250    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43251
43252    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43253    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43254    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43255    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43256    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43257    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43258    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43259    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43260    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43261    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43262    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43263    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43264    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43265    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43266    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43267    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43268    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43269    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43270    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43271    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43272    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43273    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43274    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43275    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43276
43277    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43278    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43279    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43280    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43281    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43282    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43283    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43284    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43285    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43286    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43287    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43288    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43289    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43290    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43291    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43292    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43293    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43294    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43295    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43296    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43297    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43298    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43299    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43300    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43301
43302}
43303
43304#[cfg(test)]
43305mod tests {
43306
43307    use stdarch_test::simd_test;
43308
43309    use crate::core_arch::x86::*;
43310    use crate::hint::black_box;
43311    use crate::mem::{self};
43312
43313    #[simd_test(enable = "avx512f")]
43314    unsafe fn test_mm512_abs_epi32() {
43315        #[rustfmt::skip]
43316        let a = _mm512_setr_epi32(
43317            0, 1, -1, i32::MAX,
43318            i32::MIN, 100, -100, -32,
43319            0, 1, -1, i32::MAX,
43320            i32::MIN, 100, -100, -32,
43321        );
43322        let r = _mm512_abs_epi32(a);
43323        #[rustfmt::skip]
43324        let e = _mm512_setr_epi32(
43325            0, 1, 1, i32::MAX,
43326            i32::MAX.wrapping_add(1), 100, 100, 32,
43327            0, 1, 1, i32::MAX,
43328            i32::MAX.wrapping_add(1), 100, 100, 32,
43329        );
43330        assert_eq_m512i(r, e);
43331    }
43332
43333    #[simd_test(enable = "avx512f")]
43334    unsafe fn test_mm512_mask_abs_epi32() {
43335        #[rustfmt::skip]
43336        let a = _mm512_setr_epi32(
43337            0, 1, -1, i32::MAX,
43338            i32::MIN, 100, -100, -32,
43339            0, 1, -1, i32::MAX,
43340            i32::MIN, 100, -100, -32,
43341        );
43342        let r = _mm512_mask_abs_epi32(a, 0, a);
43343        assert_eq_m512i(r, a);
43344        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43345        #[rustfmt::skip]
43346        let e = _mm512_setr_epi32(
43347            0, 1, 1, i32::MAX,
43348            i32::MAX.wrapping_add(1), 100, 100, 32,
43349            0, 1, -1, i32::MAX,
43350            i32::MIN, 100, -100, -32,
43351        );
43352        assert_eq_m512i(r, e);
43353    }
43354
43355    #[simd_test(enable = "avx512f")]
43356    unsafe fn test_mm512_maskz_abs_epi32() {
43357        #[rustfmt::skip]
43358        let a = _mm512_setr_epi32(
43359            0, 1, -1, i32::MAX,
43360            i32::MIN, 100, -100, -32,
43361            0, 1, -1, i32::MAX,
43362            i32::MIN, 100, -100, -32,
43363        );
43364        let r = _mm512_maskz_abs_epi32(0, a);
43365        assert_eq_m512i(r, _mm512_setzero_si512());
43366        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43367        #[rustfmt::skip]
43368        let e = _mm512_setr_epi32(
43369            0, 1, 1, i32::MAX,
43370            i32::MAX.wrapping_add(1), 100, 100, 32,
43371            0, 0, 0, 0,
43372            0, 0, 0, 0,
43373        );
43374        assert_eq_m512i(r, e);
43375    }
43376
43377    #[simd_test(enable = "avx512f,avx512vl")]
43378    unsafe fn test_mm256_mask_abs_epi32() {
43379        #[rustfmt::skip]
43380        let a = _mm256_setr_epi32(
43381            0, 1, -1, i32::MAX,
43382            i32::MIN, 100, -100, -32,
43383        );
43384        let r = _mm256_mask_abs_epi32(a, 0, a);
43385        assert_eq_m256i(r, a);
43386        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43387        #[rustfmt::skip]
43388        let e = _mm256_setr_epi32(
43389            0, 1, 1, i32::MAX,
43390            i32::MAX.wrapping_add(1), 100, -100, -32,
43391        );
43392        assert_eq_m256i(r, e);
43393    }
43394
43395    #[simd_test(enable = "avx512f,avx512vl")]
43396    unsafe fn test_mm256_maskz_abs_epi32() {
43397        #[rustfmt::skip]
43398        let a = _mm256_setr_epi32(
43399            0, 1, -1, i32::MAX,
43400            i32::MIN, 100, -100, -32,
43401        );
43402        let r = _mm256_maskz_abs_epi32(0, a);
43403        assert_eq_m256i(r, _mm256_setzero_si256());
43404        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43405        #[rustfmt::skip]
43406        let e = _mm256_setr_epi32(
43407            0, 1, 1, i32::MAX,
43408            0, 0, 0, 0,
43409        );
43410        assert_eq_m256i(r, e);
43411    }
43412
43413    #[simd_test(enable = "avx512f,avx512vl")]
43414    unsafe fn test_mm_mask_abs_epi32() {
43415        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43416        let r = _mm_mask_abs_epi32(a, 0, a);
43417        assert_eq_m128i(r, a);
43418        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43419        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43420        assert_eq_m128i(r, e);
43421    }
43422
43423    #[simd_test(enable = "avx512f,avx512vl")]
43424    unsafe fn test_mm_maskz_abs_epi32() {
43425        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43426        let r = _mm_maskz_abs_epi32(0, a);
43427        assert_eq_m128i(r, _mm_setzero_si128());
43428        let r = _mm_maskz_abs_epi32(0b00001111, a);
43429        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43430        assert_eq_m128i(r, e);
43431    }
43432
43433    #[simd_test(enable = "avx512f")]
43434    unsafe fn test_mm512_abs_ps() {
43435        #[rustfmt::skip]
43436        let a = _mm512_setr_ps(
43437            0., 1., -1., f32::MAX,
43438            f32::MIN, 100., -100., -32.,
43439            0., 1., -1., f32::MAX,
43440            f32::MIN, 100., -100., -32.,
43441        );
43442        let r = _mm512_abs_ps(a);
43443        #[rustfmt::skip]
43444        let e = _mm512_setr_ps(
43445            0., 1., 1., f32::MAX,
43446            f32::MAX, 100., 100., 32.,
43447            0., 1., 1., f32::MAX,
43448            f32::MAX, 100., 100., 32.,
43449        );
43450        assert_eq_m512(r, e);
43451    }
43452
43453    #[simd_test(enable = "avx512f")]
43454    unsafe fn test_mm512_mask_abs_ps() {
43455        #[rustfmt::skip]
43456        let a = _mm512_setr_ps(
43457            0., 1., -1., f32::MAX,
43458            f32::MIN, 100., -100., -32.,
43459            0., 1., -1., f32::MAX,
43460            f32::MIN, 100., -100., -32.,
43461        );
43462        let r = _mm512_mask_abs_ps(a, 0, a);
43463        assert_eq_m512(r, a);
43464        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43465        #[rustfmt::skip]
43466        let e = _mm512_setr_ps(
43467            0., 1., 1., f32::MAX,
43468            f32::MAX, 100., 100., 32.,
43469            0., 1., -1., f32::MAX,
43470            f32::MIN, 100., -100., -32.,
43471        );
43472        assert_eq_m512(r, e);
43473    }
43474
43475    #[simd_test(enable = "avx512f")]
43476    unsafe fn test_mm512_mask_mov_epi32() {
43477        let src = _mm512_set1_epi32(1);
43478        let a = _mm512_set1_epi32(2);
43479        let r = _mm512_mask_mov_epi32(src, 0, a);
43480        assert_eq_m512i(r, src);
43481        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43482        assert_eq_m512i(r, a);
43483    }
43484
43485    #[simd_test(enable = "avx512f")]
43486    unsafe fn test_mm512_maskz_mov_epi32() {
43487        let a = _mm512_set1_epi32(2);
43488        let r = _mm512_maskz_mov_epi32(0, a);
43489        assert_eq_m512i(r, _mm512_setzero_si512());
43490        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43491        assert_eq_m512i(r, a);
43492    }
43493
43494    #[simd_test(enable = "avx512f,avx512vl")]
43495    unsafe fn test_mm256_mask_mov_epi32() {
43496        let src = _mm256_set1_epi32(1);
43497        let a = _mm256_set1_epi32(2);
43498        let r = _mm256_mask_mov_epi32(src, 0, a);
43499        assert_eq_m256i(r, src);
43500        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43501        assert_eq_m256i(r, a);
43502    }
43503
43504    #[simd_test(enable = "avx512f,avx512vl")]
43505    unsafe fn test_mm256_maskz_mov_epi32() {
43506        let a = _mm256_set1_epi32(2);
43507        let r = _mm256_maskz_mov_epi32(0, a);
43508        assert_eq_m256i(r, _mm256_setzero_si256());
43509        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43510        assert_eq_m256i(r, a);
43511    }
43512
43513    #[simd_test(enable = "avx512f,avx512vl")]
43514    unsafe fn test_mm_mask_mov_epi32() {
43515        let src = _mm_set1_epi32(1);
43516        let a = _mm_set1_epi32(2);
43517        let r = _mm_mask_mov_epi32(src, 0, a);
43518        assert_eq_m128i(r, src);
43519        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43520        assert_eq_m128i(r, a);
43521    }
43522
43523    #[simd_test(enable = "avx512f,avx512vl")]
43524    unsafe fn test_mm_maskz_mov_epi32() {
43525        let a = _mm_set1_epi32(2);
43526        let r = _mm_maskz_mov_epi32(0, a);
43527        assert_eq_m128i(r, _mm_setzero_si128());
43528        let r = _mm_maskz_mov_epi32(0b00001111, a);
43529        assert_eq_m128i(r, a);
43530    }
43531
43532    #[simd_test(enable = "avx512f")]
43533    unsafe fn test_mm512_mask_mov_ps() {
43534        let src = _mm512_set1_ps(1.);
43535        let a = _mm512_set1_ps(2.);
43536        let r = _mm512_mask_mov_ps(src, 0, a);
43537        assert_eq_m512(r, src);
43538        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43539        assert_eq_m512(r, a);
43540    }
43541
43542    #[simd_test(enable = "avx512f")]
43543    unsafe fn test_mm512_maskz_mov_ps() {
43544        let a = _mm512_set1_ps(2.);
43545        let r = _mm512_maskz_mov_ps(0, a);
43546        assert_eq_m512(r, _mm512_setzero_ps());
43547        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43548        assert_eq_m512(r, a);
43549    }
43550
43551    #[simd_test(enable = "avx512f,avx512vl")]
43552    unsafe fn test_mm256_mask_mov_ps() {
43553        let src = _mm256_set1_ps(1.);
43554        let a = _mm256_set1_ps(2.);
43555        let r = _mm256_mask_mov_ps(src, 0, a);
43556        assert_eq_m256(r, src);
43557        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43558        assert_eq_m256(r, a);
43559    }
43560
43561    #[simd_test(enable = "avx512f,avx512vl")]
43562    unsafe fn test_mm256_maskz_mov_ps() {
43563        let a = _mm256_set1_ps(2.);
43564        let r = _mm256_maskz_mov_ps(0, a);
43565        assert_eq_m256(r, _mm256_setzero_ps());
43566        let r = _mm256_maskz_mov_ps(0b11111111, a);
43567        assert_eq_m256(r, a);
43568    }
43569
43570    #[simd_test(enable = "avx512f,avx512vl")]
43571    unsafe fn test_mm_mask_mov_ps() {
43572        let src = _mm_set1_ps(1.);
43573        let a = _mm_set1_ps(2.);
43574        let r = _mm_mask_mov_ps(src, 0, a);
43575        assert_eq_m128(r, src);
43576        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43577        assert_eq_m128(r, a);
43578    }
43579
43580    #[simd_test(enable = "avx512f,avx512vl")]
43581    unsafe fn test_mm_maskz_mov_ps() {
43582        let a = _mm_set1_ps(2.);
43583        let r = _mm_maskz_mov_ps(0, a);
43584        assert_eq_m128(r, _mm_setzero_ps());
43585        let r = _mm_maskz_mov_ps(0b00001111, a);
43586        assert_eq_m128(r, a);
43587    }
43588
43589    #[simd_test(enable = "avx512f")]
43590    unsafe fn test_mm512_add_epi32() {
43591        #[rustfmt::skip]
43592        let a = _mm512_setr_epi32(
43593            0, 1, -1, i32::MAX,
43594            i32::MIN, 100, -100, -32,
43595            0, 1, -1, i32::MAX,
43596            i32::MIN, 100, -100, -32,
43597        );
43598        let b = _mm512_set1_epi32(1);
43599        let r = _mm512_add_epi32(a, b);
43600        #[rustfmt::skip]
43601        let e = _mm512_setr_epi32(
43602            1, 2, 0, i32::MIN,
43603            i32::MIN + 1, 101, -99, -31,
43604            1, 2, 0, i32::MIN,
43605            i32::MIN + 1, 101, -99, -31,
43606        );
43607        assert_eq_m512i(r, e);
43608    }
43609
43610    #[simd_test(enable = "avx512f")]
43611    unsafe fn test_mm512_mask_add_epi32() {
43612        #[rustfmt::skip]
43613        let a = _mm512_setr_epi32(
43614            0, 1, -1, i32::MAX,
43615            i32::MIN, 100, -100, -32,
43616            0, 1, -1, i32::MAX,
43617            i32::MIN, 100, -100, -32,
43618        );
43619        let b = _mm512_set1_epi32(1);
43620        let r = _mm512_mask_add_epi32(a, 0, a, b);
43621        assert_eq_m512i(r, a);
43622        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43623        #[rustfmt::skip]
43624        let e = _mm512_setr_epi32(
43625            1, 2, 0, i32::MIN,
43626            i32::MIN + 1, 101, -99, -31,
43627            0, 1, -1, i32::MAX,
43628            i32::MIN, 100, -100, -32,
43629        );
43630        assert_eq_m512i(r, e);
43631    }
43632
43633    #[simd_test(enable = "avx512f")]
43634    unsafe fn test_mm512_maskz_add_epi32() {
43635        #[rustfmt::skip]
43636        let a = _mm512_setr_epi32(
43637            0, 1, -1, i32::MAX,
43638            i32::MIN, 100, -100, -32,
43639            0, 1, -1, i32::MAX,
43640            i32::MIN, 100, -100, -32,
43641        );
43642        let b = _mm512_set1_epi32(1);
43643        let r = _mm512_maskz_add_epi32(0, a, b);
43644        assert_eq_m512i(r, _mm512_setzero_si512());
43645        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43646        #[rustfmt::skip]
43647        let e = _mm512_setr_epi32(
43648            1, 2, 0, i32::MIN,
43649            i32::MIN + 1, 101, -99, -31,
43650            0, 0, 0, 0,
43651            0, 0, 0, 0,
43652        );
43653        assert_eq_m512i(r, e);
43654    }
43655
43656    #[simd_test(enable = "avx512f,avx512vl")]
43657    unsafe fn test_mm256_mask_add_epi32() {
43658        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43659        let b = _mm256_set1_epi32(1);
43660        let r = _mm256_mask_add_epi32(a, 0, a, b);
43661        assert_eq_m256i(r, a);
43662        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43663        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43664        assert_eq_m256i(r, e);
43665    }
43666
43667    #[simd_test(enable = "avx512f,avx512vl")]
43668    unsafe fn test_mm256_maskz_add_epi32() {
43669        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43670        let b = _mm256_set1_epi32(1);
43671        let r = _mm256_maskz_add_epi32(0, a, b);
43672        assert_eq_m256i(r, _mm256_setzero_si256());
43673        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43674        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43675        assert_eq_m256i(r, e);
43676    }
43677
43678    #[simd_test(enable = "avx512f,avx512vl")]
43679    unsafe fn test_mm_mask_add_epi32() {
43680        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43681        let b = _mm_set1_epi32(1);
43682        let r = _mm_mask_add_epi32(a, 0, a, b);
43683        assert_eq_m128i(r, a);
43684        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43685        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43686        assert_eq_m128i(r, e);
43687    }
43688
43689    #[simd_test(enable = "avx512f,avx512vl")]
43690    unsafe fn test_mm_maskz_add_epi32() {
43691        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43692        let b = _mm_set1_epi32(1);
43693        let r = _mm_maskz_add_epi32(0, a, b);
43694        assert_eq_m128i(r, _mm_setzero_si128());
43695        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43696        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43697        assert_eq_m128i(r, e);
43698    }
43699
43700    #[simd_test(enable = "avx512f")]
43701    unsafe fn test_mm512_add_ps() {
43702        #[rustfmt::skip]
43703        let a = _mm512_setr_ps(
43704            0., 1., -1., f32::MAX,
43705            f32::MIN, 100., -100., -32.,
43706            0., 1., -1., f32::MAX,
43707            f32::MIN, 100., -100., -32.,
43708        );
43709        let b = _mm512_set1_ps(1.);
43710        let r = _mm512_add_ps(a, b);
43711        #[rustfmt::skip]
43712        let e = _mm512_setr_ps(
43713            1., 2., 0., f32::MAX,
43714            f32::MIN + 1., 101., -99., -31.,
43715            1., 2., 0., f32::MAX,
43716            f32::MIN + 1., 101., -99., -31.,
43717        );
43718        assert_eq_m512(r, e);
43719    }
43720
43721    #[simd_test(enable = "avx512f")]
43722    unsafe fn test_mm512_mask_add_ps() {
43723        #[rustfmt::skip]
43724        let a = _mm512_setr_ps(
43725            0., 1., -1., f32::MAX,
43726            f32::MIN, 100., -100., -32.,
43727            0., 1., -1., f32::MAX,
43728            f32::MIN, 100., -100., -32.,
43729        );
43730        let b = _mm512_set1_ps(1.);
43731        let r = _mm512_mask_add_ps(a, 0, a, b);
43732        assert_eq_m512(r, a);
43733        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43734        #[rustfmt::skip]
43735        let e = _mm512_setr_ps(
43736            1., 2., 0., f32::MAX,
43737            f32::MIN + 1., 101., -99., -31.,
43738            0., 1., -1., f32::MAX,
43739            f32::MIN, 100., -100., -32.,
43740        );
43741        assert_eq_m512(r, e);
43742    }
43743
43744    #[simd_test(enable = "avx512f")]
43745    unsafe fn test_mm512_maskz_add_ps() {
43746        #[rustfmt::skip]
43747        let a = _mm512_setr_ps(
43748            0., 1., -1., f32::MAX,
43749            f32::MIN, 100., -100., -32.,
43750            0., 1., -1., f32::MAX,
43751            f32::MIN, 100., -100., -32.,
43752        );
43753        let b = _mm512_set1_ps(1.);
43754        let r = _mm512_maskz_add_ps(0, a, b);
43755        assert_eq_m512(r, _mm512_setzero_ps());
43756        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43757        #[rustfmt::skip]
43758        let e = _mm512_setr_ps(
43759            1., 2., 0., f32::MAX,
43760            f32::MIN + 1., 101., -99., -31.,
43761            0., 0., 0., 0.,
43762            0., 0., 0., 0.,
43763        );
43764        assert_eq_m512(r, e);
43765    }
43766
43767    #[simd_test(enable = "avx512f,avx512vl")]
43768    unsafe fn test_mm256_mask_add_ps() {
43769        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43770        let b = _mm256_set1_ps(1.);
43771        let r = _mm256_mask_add_ps(a, 0, a, b);
43772        assert_eq_m256(r, a);
43773        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43774        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43775        assert_eq_m256(r, e);
43776    }
43777
43778    #[simd_test(enable = "avx512f,avx512vl")]
43779    unsafe fn test_mm256_maskz_add_ps() {
43780        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43781        let b = _mm256_set1_ps(1.);
43782        let r = _mm256_maskz_add_ps(0, a, b);
43783        assert_eq_m256(r, _mm256_setzero_ps());
43784        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43785        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43786        assert_eq_m256(r, e);
43787    }
43788
43789    #[simd_test(enable = "avx512f,avx512vl")]
43790    unsafe fn test_mm_mask_add_ps() {
43791        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43792        let b = _mm_set1_ps(1.);
43793        let r = _mm_mask_add_ps(a, 0, a, b);
43794        assert_eq_m128(r, a);
43795        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43796        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43797        assert_eq_m128(r, e);
43798    }
43799
43800    #[simd_test(enable = "avx512f,avx512vl")]
43801    unsafe fn test_mm_maskz_add_ps() {
43802        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43803        let b = _mm_set1_ps(1.);
43804        let r = _mm_maskz_add_ps(0, a, b);
43805        assert_eq_m128(r, _mm_setzero_ps());
43806        let r = _mm_maskz_add_ps(0b00001111, a, b);
43807        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43808        assert_eq_m128(r, e);
43809    }
43810
43811    #[simd_test(enable = "avx512f")]
43812    unsafe fn test_mm512_sub_epi32() {
43813        #[rustfmt::skip]
43814        let a = _mm512_setr_epi32(
43815            0, 1, -1, i32::MAX,
43816            i32::MIN, 100, -100, -32,
43817            0, 1, -1, i32::MAX,
43818            i32::MIN, 100, -100, -32,
43819        );
43820        let b = _mm512_set1_epi32(1);
43821        let r = _mm512_sub_epi32(a, b);
43822        #[rustfmt::skip]
43823        let e = _mm512_setr_epi32(
43824            -1, 0, -2, i32::MAX - 1,
43825            i32::MAX, 99, -101, -33,
43826            -1, 0, -2, i32::MAX - 1,
43827            i32::MAX, 99, -101, -33,
43828        );
43829        assert_eq_m512i(r, e);
43830    }
43831
43832    #[simd_test(enable = "avx512f")]
43833    unsafe fn test_mm512_mask_sub_epi32() {
43834        #[rustfmt::skip]
43835        let a = _mm512_setr_epi32(
43836            0, 1, -1, i32::MAX,
43837            i32::MIN, 100, -100, -32,
43838            0, 1, -1, i32::MAX,
43839            i32::MIN, 100, -100, -32,
43840        );
43841        let b = _mm512_set1_epi32(1);
43842        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43843        assert_eq_m512i(r, a);
43844        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43845        #[rustfmt::skip]
43846        let e = _mm512_setr_epi32(
43847            -1, 0, -2, i32::MAX - 1,
43848            i32::MAX, 99, -101, -33,
43849            0, 1, -1, i32::MAX,
43850            i32::MIN, 100, -100, -32,
43851        );
43852        assert_eq_m512i(r, e);
43853    }
43854
43855    #[simd_test(enable = "avx512f")]
43856    unsafe fn test_mm512_maskz_sub_epi32() {
43857        #[rustfmt::skip]
43858        let a = _mm512_setr_epi32(
43859            0, 1, -1, i32::MAX,
43860            i32::MIN, 100, -100, -32,
43861            0, 1, -1, i32::MAX,
43862            i32::MIN, 100, -100, -32,
43863        );
43864        let b = _mm512_set1_epi32(1);
43865        let r = _mm512_maskz_sub_epi32(0, a, b);
43866        assert_eq_m512i(r, _mm512_setzero_si512());
43867        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43868        #[rustfmt::skip]
43869        let e = _mm512_setr_epi32(
43870            -1, 0, -2, i32::MAX - 1,
43871            i32::MAX, 99, -101, -33,
43872            0, 0, 0, 0,
43873            0, 0, 0, 0,
43874        );
43875        assert_eq_m512i(r, e);
43876    }
43877
43878    #[simd_test(enable = "avx512f,avx512vl")]
43879    unsafe fn test_mm256_mask_sub_epi32() {
43880        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43881        let b = _mm256_set1_epi32(1);
43882        let r = _mm256_mask_sub_epi32(a, 0, a, b);
43883        assert_eq_m256i(r, a);
43884        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43885        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43886        assert_eq_m256i(r, e);
43887    }
43888
43889    #[simd_test(enable = "avx512f,avx512vl")]
43890    unsafe fn test_mm256_maskz_sub_epi32() {
43891        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43892        let b = _mm256_set1_epi32(1);
43893        let r = _mm256_maskz_sub_epi32(0, a, b);
43894        assert_eq_m256i(r, _mm256_setzero_si256());
43895        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43896        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43897        assert_eq_m256i(r, e);
43898    }
43899
43900    #[simd_test(enable = "avx512f,avx512vl")]
43901    unsafe fn test_mm_mask_sub_epi32() {
43902        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43903        let b = _mm_set1_epi32(1);
43904        let r = _mm_mask_sub_epi32(a, 0, a, b);
43905        assert_eq_m128i(r, a);
43906        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43907        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43908        assert_eq_m128i(r, e);
43909    }
43910
43911    #[simd_test(enable = "avx512f,avx512vl")]
43912    unsafe fn test_mm_maskz_sub_epi32() {
43913        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43914        let b = _mm_set1_epi32(1);
43915        let r = _mm_maskz_sub_epi32(0, a, b);
43916        assert_eq_m128i(r, _mm_setzero_si128());
43917        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43918        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43919        assert_eq_m128i(r, e);
43920    }
43921
43922    #[simd_test(enable = "avx512f")]
43923    unsafe fn test_mm512_sub_ps() {
43924        #[rustfmt::skip]
43925        let a = _mm512_setr_ps(
43926            0., 1., -1., f32::MAX,
43927            f32::MIN, 100., -100., -32.,
43928            0., 1., -1., f32::MAX,
43929            f32::MIN, 100., -100., -32.,
43930        );
43931        let b = _mm512_set1_ps(1.);
43932        let r = _mm512_sub_ps(a, b);
43933        #[rustfmt::skip]
43934        let e = _mm512_setr_ps(
43935            -1., 0., -2., f32::MAX - 1.,
43936            f32::MIN, 99., -101., -33.,
43937            -1., 0., -2., f32::MAX - 1.,
43938            f32::MIN, 99., -101., -33.,
43939        );
43940        assert_eq_m512(r, e);
43941    }
43942
43943    #[simd_test(enable = "avx512f")]
43944    unsafe fn test_mm512_mask_sub_ps() {
43945        #[rustfmt::skip]
43946        let a = _mm512_setr_ps(
43947            0., 1., -1., f32::MAX,
43948            f32::MIN, 100., -100., -32.,
43949            0., 1., -1., f32::MAX,
43950            f32::MIN, 100., -100., -32.,
43951        );
43952        let b = _mm512_set1_ps(1.);
43953        let r = _mm512_mask_sub_ps(a, 0, a, b);
43954        assert_eq_m512(r, a);
43955        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
43956        #[rustfmt::skip]
43957        let e = _mm512_setr_ps(
43958            -1., 0., -2., f32::MAX - 1.,
43959            f32::MIN, 99., -101., -33.,
43960            0., 1., -1., f32::MAX,
43961            f32::MIN, 100., -100., -32.,
43962        );
43963        assert_eq_m512(r, e);
43964    }
43965
43966    #[simd_test(enable = "avx512f")]
43967    unsafe fn test_mm512_maskz_sub_ps() {
43968        #[rustfmt::skip]
43969        let a = _mm512_setr_ps(
43970            0., 1., -1., f32::MAX,
43971            f32::MIN, 100., -100., -32.,
43972            0., 1., -1., f32::MAX,
43973            f32::MIN, 100., -100., -32.,
43974        );
43975        let b = _mm512_set1_ps(1.);
43976        let r = _mm512_maskz_sub_ps(0, a, b);
43977        assert_eq_m512(r, _mm512_setzero_ps());
43978        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
43979        #[rustfmt::skip]
43980        let e = _mm512_setr_ps(
43981            -1., 0., -2., f32::MAX - 1.,
43982            f32::MIN, 99., -101., -33.,
43983            0., 0., 0., 0.,
43984            0., 0., 0., 0.,
43985        );
43986        assert_eq_m512(r, e);
43987    }
43988
43989    #[simd_test(enable = "avx512f,avx512vl")]
43990    unsafe fn test_mm256_mask_sub_ps() {
43991        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43992        let b = _mm256_set1_ps(1.);
43993        let r = _mm256_mask_sub_ps(a, 0, a, b);
43994        assert_eq_m256(r, a);
43995        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
43996        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43997        assert_eq_m256(r, e);
43998    }
43999
44000    #[simd_test(enable = "avx512f,avx512vl")]
44001    unsafe fn test_mm256_maskz_sub_ps() {
44002        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44003        let b = _mm256_set1_ps(1.);
44004        let r = _mm256_maskz_sub_ps(0, a, b);
44005        assert_eq_m256(r, _mm256_setzero_ps());
44006        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
44007        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
44008        assert_eq_m256(r, e);
44009    }
44010
44011    #[simd_test(enable = "avx512f,avx512vl")]
44012    unsafe fn test_mm_mask_sub_ps() {
44013        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44014        let b = _mm_set1_ps(1.);
44015        let r = _mm_mask_sub_ps(a, 0, a, b);
44016        assert_eq_m128(r, a);
44017        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44018        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44019        assert_eq_m128(r, e);
44020    }
44021
44022    #[simd_test(enable = "avx512f,avx512vl")]
44023    unsafe fn test_mm_maskz_sub_ps() {
44024        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44025        let b = _mm_set1_ps(1.);
44026        let r = _mm_maskz_sub_ps(0, a, b);
44027        assert_eq_m128(r, _mm_setzero_ps());
44028        let r = _mm_maskz_sub_ps(0b00001111, a, b);
44029        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44030        assert_eq_m128(r, e);
44031    }
44032
44033    #[simd_test(enable = "avx512f")]
44034    unsafe fn test_mm512_mullo_epi32() {
44035        #[rustfmt::skip]
44036        let a = _mm512_setr_epi32(
44037            0, 1, -1, i32::MAX,
44038            i32::MIN, 100, -100, -32,
44039            0, 1, -1, i32::MAX,
44040            i32::MIN, 100, -100, -32,
44041        );
44042        let b = _mm512_set1_epi32(2);
44043        let r = _mm512_mullo_epi32(a, b);
44044        let e = _mm512_setr_epi32(
44045            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44046        );
44047        assert_eq_m512i(r, e);
44048    }
44049
44050    #[simd_test(enable = "avx512f")]
44051    unsafe fn test_mm512_mask_mullo_epi32() {
44052        #[rustfmt::skip]
44053        let a = _mm512_setr_epi32(
44054            0, 1, -1, i32::MAX,
44055            i32::MIN, 100, -100, -32,
44056            0, 1, -1, i32::MAX,
44057            i32::MIN, 100, -100, -32,
44058        );
44059        let b = _mm512_set1_epi32(2);
44060        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44061        assert_eq_m512i(r, a);
44062        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44063        #[rustfmt::skip]
44064        let e = _mm512_setr_epi32(
44065            0, 2, -2, -2,
44066            0, 200, -200, -64,
44067            0, 1, -1, i32::MAX,
44068            i32::MIN, 100, -100, -32,
44069        );
44070        assert_eq_m512i(r, e);
44071    }
44072
44073    #[simd_test(enable = "avx512f")]
44074    unsafe fn test_mm512_maskz_mullo_epi32() {
44075        #[rustfmt::skip]
44076        let a = _mm512_setr_epi32(
44077            0, 1, -1, i32::MAX,
44078            i32::MIN, 100, -100, -32,
44079            0, 1, -1, i32::MAX,
44080            i32::MIN, 100, -100, -32,
44081        );
44082        let b = _mm512_set1_epi32(2);
44083        let r = _mm512_maskz_mullo_epi32(0, a, b);
44084        assert_eq_m512i(r, _mm512_setzero_si512());
44085        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44086        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44087        assert_eq_m512i(r, e);
44088    }
44089
44090    #[simd_test(enable = "avx512f,avx512vl")]
44091    unsafe fn test_mm256_mask_mullo_epi32() {
44092        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44093        let b = _mm256_set1_epi32(2);
44094        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44095        assert_eq_m256i(r, a);
44096        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44097        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44098        assert_eq_m256i(r, e);
44099    }
44100
44101    #[simd_test(enable = "avx512f,avx512vl")]
44102    unsafe fn test_mm256_maskz_mullo_epi32() {
44103        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44104        let b = _mm256_set1_epi32(2);
44105        let r = _mm256_maskz_mullo_epi32(0, a, b);
44106        assert_eq_m256i(r, _mm256_setzero_si256());
44107        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44108        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44109        assert_eq_m256i(r, e);
44110    }
44111
44112    #[simd_test(enable = "avx512f,avx512vl")]
44113    unsafe fn test_mm_mask_mullo_epi32() {
44114        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44115        let b = _mm_set1_epi32(2);
44116        let r = _mm_mask_mullo_epi32(a, 0, a, b);
44117        assert_eq_m128i(r, a);
44118        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44119        let e = _mm_set_epi32(2, -2, -2, 0);
44120        assert_eq_m128i(r, e);
44121    }
44122
44123    #[simd_test(enable = "avx512f,avx512vl")]
44124    unsafe fn test_mm_maskz_mullo_epi32() {
44125        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44126        let b = _mm_set1_epi32(2);
44127        let r = _mm_maskz_mullo_epi32(0, a, b);
44128        assert_eq_m128i(r, _mm_setzero_si128());
44129        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44130        let e = _mm_set_epi32(2, -2, -2, 0);
44131        assert_eq_m128i(r, e);
44132    }
44133
44134    #[simd_test(enable = "avx512f")]
44135    unsafe fn test_mm512_mul_ps() {
44136        #[rustfmt::skip]
44137        let a = _mm512_setr_ps(
44138            0., 1., -1., f32::MAX,
44139            f32::MIN, 100., -100., -32.,
44140            0., 1., -1., f32::MAX,
44141            f32::MIN, 100., -100., -32.,
44142        );
44143        let b = _mm512_set1_ps(2.);
44144        let r = _mm512_mul_ps(a, b);
44145        #[rustfmt::skip]
44146        let e = _mm512_setr_ps(
44147            0., 2., -2., f32::INFINITY,
44148            f32::NEG_INFINITY, 200., -200., -64.,
44149            0., 2., -2., f32::INFINITY,
44150            f32::NEG_INFINITY, 200., -200.,
44151            -64.,
44152        );
44153        assert_eq_m512(r, e);
44154    }
44155
44156    #[simd_test(enable = "avx512f")]
44157    unsafe fn test_mm512_mask_mul_ps() {
44158        #[rustfmt::skip]
44159        let a = _mm512_setr_ps(
44160            0., 1., -1., f32::MAX,
44161            f32::MIN, 100., -100., -32.,
44162            0., 1., -1., f32::MAX,
44163            f32::MIN, 100., -100., -32.,
44164        );
44165        let b = _mm512_set1_ps(2.);
44166        let r = _mm512_mask_mul_ps(a, 0, a, b);
44167        assert_eq_m512(r, a);
44168        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44169        #[rustfmt::skip]
44170        let e = _mm512_setr_ps(
44171            0., 2., -2., f32::INFINITY,
44172            f32::NEG_INFINITY, 200., -200., -64.,
44173            0., 1., -1., f32::MAX,
44174            f32::MIN, 100., -100., -32.,
44175        );
44176        assert_eq_m512(r, e);
44177    }
44178
44179    #[simd_test(enable = "avx512f")]
44180    unsafe fn test_mm512_maskz_mul_ps() {
44181        #[rustfmt::skip]
44182        let a = _mm512_setr_ps(
44183            0., 1., -1., f32::MAX,
44184            f32::MIN, 100., -100., -32.,
44185            0., 1., -1., f32::MAX,
44186            f32::MIN, 100., -100., -32.,
44187        );
44188        let b = _mm512_set1_ps(2.);
44189        let r = _mm512_maskz_mul_ps(0, a, b);
44190        assert_eq_m512(r, _mm512_setzero_ps());
44191        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44192        #[rustfmt::skip]
44193        let e = _mm512_setr_ps(
44194            0., 2., -2., f32::INFINITY,
44195            f32::NEG_INFINITY, 200., -200., -64.,
44196            0., 0., 0., 0.,
44197            0., 0., 0., 0.,
44198        );
44199        assert_eq_m512(r, e);
44200    }
44201
44202    #[simd_test(enable = "avx512f,avx512vl")]
44203    unsafe fn test_mm256_mask_mul_ps() {
44204        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44205        let b = _mm256_set1_ps(2.);
44206        let r = _mm256_mask_mul_ps(a, 0, a, b);
44207        assert_eq_m256(r, a);
44208        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44209        #[rustfmt::skip]
44210        let e = _mm256_set_ps(
44211            0., 2., -2., f32::INFINITY,
44212            f32::NEG_INFINITY, 200., -200., -64.,
44213        );
44214        assert_eq_m256(r, e);
44215    }
44216
44217    #[simd_test(enable = "avx512f,avx512vl")]
44218    unsafe fn test_mm256_maskz_mul_ps() {
44219        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44220        let b = _mm256_set1_ps(2.);
44221        let r = _mm256_maskz_mul_ps(0, a, b);
44222        assert_eq_m256(r, _mm256_setzero_ps());
44223        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44224        #[rustfmt::skip]
44225        let e = _mm256_set_ps(
44226            0., 2., -2., f32::INFINITY,
44227            f32::NEG_INFINITY, 200., -200., -64.,
44228        );
44229        assert_eq_m256(r, e);
44230    }
44231
44232    #[simd_test(enable = "avx512f,avx512vl")]
44233    unsafe fn test_mm_mask_mul_ps() {
44234        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44235        let b = _mm_set1_ps(2.);
44236        let r = _mm_mask_mul_ps(a, 0, a, b);
44237        assert_eq_m128(r, a);
44238        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44239        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44240        assert_eq_m128(r, e);
44241    }
44242
44243    #[simd_test(enable = "avx512f,avx512vl")]
44244    unsafe fn test_mm_maskz_mul_ps() {
44245        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44246        let b = _mm_set1_ps(2.);
44247        let r = _mm_maskz_mul_ps(0, a, b);
44248        assert_eq_m128(r, _mm_setzero_ps());
44249        let r = _mm_maskz_mul_ps(0b00001111, a, b);
44250        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44251        assert_eq_m128(r, e);
44252    }
44253
44254    #[simd_test(enable = "avx512f")]
44255    unsafe fn test_mm512_div_ps() {
44256        let a = _mm512_setr_ps(
44257            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44258        );
44259        let b = _mm512_setr_ps(
44260            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44261        );
44262        let r = _mm512_div_ps(a, b);
44263        #[rustfmt::skip]
44264        let e = _mm512_setr_ps(
44265            0., 0.5, -0.5, -1.,
44266            50., f32::INFINITY, -50., -16.,
44267            0., 0.5, -0.5, 500.,
44268            f32::NEG_INFINITY, 50., -50., -16.,
44269        );
44270        assert_eq_m512(r, e); // 0/0 = NAN
44271    }
44272
44273    #[simd_test(enable = "avx512f")]
44274    unsafe fn test_mm512_mask_div_ps() {
44275        let a = _mm512_setr_ps(
44276            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44277        );
44278        let b = _mm512_setr_ps(
44279            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44280        );
44281        let r = _mm512_mask_div_ps(a, 0, a, b);
44282        assert_eq_m512(r, a);
44283        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44284        #[rustfmt::skip]
44285        let e = _mm512_setr_ps(
44286            0., 0.5, -0.5, -1.,
44287            50., f32::INFINITY, -50., -16.,
44288            0., 1., -1., 1000.,
44289            -131., 100., -100., -32.,
44290        );
44291        assert_eq_m512(r, e);
44292    }
44293
44294    #[simd_test(enable = "avx512f")]
44295    unsafe fn test_mm512_maskz_div_ps() {
44296        let a = _mm512_setr_ps(
44297            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44298        );
44299        let b = _mm512_setr_ps(
44300            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44301        );
44302        let r = _mm512_maskz_div_ps(0, a, b);
44303        assert_eq_m512(r, _mm512_setzero_ps());
44304        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44305        #[rustfmt::skip]
44306        let e = _mm512_setr_ps(
44307            0., 0.5, -0.5, -1.,
44308            50., f32::INFINITY, -50., -16.,
44309            0., 0., 0., 0.,
44310            0., 0., 0., 0.,
44311        );
44312        assert_eq_m512(r, e);
44313    }
44314
44315    #[simd_test(enable = "avx512f,avx512vl")]
44316    unsafe fn test_mm256_mask_div_ps() {
44317        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44318        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44319        let r = _mm256_mask_div_ps(a, 0, a, b);
44320        assert_eq_m256(r, a);
44321        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44322        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44323        assert_eq_m256(r, e);
44324    }
44325
44326    #[simd_test(enable = "avx512f,avx512vl")]
44327    unsafe fn test_mm256_maskz_div_ps() {
44328        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44329        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44330        let r = _mm256_maskz_div_ps(0, a, b);
44331        assert_eq_m256(r, _mm256_setzero_ps());
44332        let r = _mm256_maskz_div_ps(0b11111111, a, b);
44333        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44334        assert_eq_m256(r, e);
44335    }
44336
44337    #[simd_test(enable = "avx512f,avx512vl")]
44338    unsafe fn test_mm_mask_div_ps() {
44339        let a = _mm_set_ps(100., 100., -100., -32.);
44340        let b = _mm_set_ps(2., 0., 2., 2.);
44341        let r = _mm_mask_div_ps(a, 0, a, b);
44342        assert_eq_m128(r, a);
44343        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44344        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44345        assert_eq_m128(r, e);
44346    }
44347
44348    #[simd_test(enable = "avx512f,avx512vl")]
44349    unsafe fn test_mm_maskz_div_ps() {
44350        let a = _mm_set_ps(100., 100., -100., -32.);
44351        let b = _mm_set_ps(2., 0., 2., 2.);
44352        let r = _mm_maskz_div_ps(0, a, b);
44353        assert_eq_m128(r, _mm_setzero_ps());
44354        let r = _mm_maskz_div_ps(0b00001111, a, b);
44355        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44356        assert_eq_m128(r, e);
44357    }
44358
44359    #[simd_test(enable = "avx512f")]
44360    unsafe fn test_mm512_max_epi32() {
44361        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44362        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44363        let r = _mm512_max_epi32(a, b);
44364        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44365        assert_eq_m512i(r, e);
44366    }
44367
44368    #[simd_test(enable = "avx512f")]
44369    unsafe fn test_mm512_mask_max_epi32() {
44370        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44371        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44372        let r = _mm512_mask_max_epi32(a, 0, a, b);
44373        assert_eq_m512i(r, a);
44374        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44375        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44376        assert_eq_m512i(r, e);
44377    }
44378
44379    #[simd_test(enable = "avx512f")]
44380    unsafe fn test_mm512_maskz_max_epi32() {
44381        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44382        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44383        let r = _mm512_maskz_max_epi32(0, a, b);
44384        assert_eq_m512i(r, _mm512_setzero_si512());
44385        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44386        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44387        assert_eq_m512i(r, e);
44388    }
44389
44390    #[simd_test(enable = "avx512f,avx512vl")]
44391    unsafe fn test_mm256_mask_max_epi32() {
44392        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44393        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44394        let r = _mm256_mask_max_epi32(a, 0, a, b);
44395        assert_eq_m256i(r, a);
44396        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44397        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44398        assert_eq_m256i(r, e);
44399    }
44400
44401    #[simd_test(enable = "avx512f,avx512vl")]
44402    unsafe fn test_mm256_maskz_max_epi32() {
44403        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44404        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44405        let r = _mm256_maskz_max_epi32(0, a, b);
44406        assert_eq_m256i(r, _mm256_setzero_si256());
44407        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44408        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44409        assert_eq_m256i(r, e);
44410    }
44411
44412    #[simd_test(enable = "avx512f,avx512vl")]
44413    unsafe fn test_mm_mask_max_epi32() {
44414        let a = _mm_set_epi32(0, 1, 2, 3);
44415        let b = _mm_set_epi32(3, 2, 1, 0);
44416        let r = _mm_mask_max_epi32(a, 0, a, b);
44417        assert_eq_m128i(r, a);
44418        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44419        let e = _mm_set_epi32(3, 2, 2, 3);
44420        assert_eq_m128i(r, e);
44421    }
44422
44423    #[simd_test(enable = "avx512f,avx512vl")]
44424    unsafe fn test_mm_maskz_max_epi32() {
44425        let a = _mm_set_epi32(0, 1, 2, 3);
44426        let b = _mm_set_epi32(3, 2, 1, 0);
44427        let r = _mm_maskz_max_epi32(0, a, b);
44428        assert_eq_m128i(r, _mm_setzero_si128());
44429        let r = _mm_maskz_max_epi32(0b00001111, a, b);
44430        let e = _mm_set_epi32(3, 2, 2, 3);
44431        assert_eq_m128i(r, e);
44432    }
44433
44434    #[simd_test(enable = "avx512f")]
44435    unsafe fn test_mm512_max_ps() {
44436        let a = _mm512_setr_ps(
44437            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44438        );
44439        let b = _mm512_setr_ps(
44440            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44441        );
44442        let r = _mm512_max_ps(a, b);
44443        let e = _mm512_setr_ps(
44444            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44445        );
44446        assert_eq_m512(r, e);
44447    }
44448
44449    #[simd_test(enable = "avx512f")]
44450    unsafe fn test_mm512_mask_max_ps() {
44451        let a = _mm512_setr_ps(
44452            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44453        );
44454        let b = _mm512_setr_ps(
44455            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44456        );
44457        let r = _mm512_mask_max_ps(a, 0, a, b);
44458        assert_eq_m512(r, a);
44459        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44460        let e = _mm512_setr_ps(
44461            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44462        );
44463        assert_eq_m512(r, e);
44464    }
44465
44466    #[simd_test(enable = "avx512f")]
44467    unsafe fn test_mm512_maskz_max_ps() {
44468        let a = _mm512_setr_ps(
44469            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44470        );
44471        let b = _mm512_setr_ps(
44472            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44473        );
44474        let r = _mm512_maskz_max_ps(0, a, b);
44475        assert_eq_m512(r, _mm512_setzero_ps());
44476        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44477        let e = _mm512_setr_ps(
44478            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44479        );
44480        assert_eq_m512(r, e);
44481    }
44482
44483    #[simd_test(enable = "avx512f,avx512vl")]
44484    unsafe fn test_mm256_mask_max_ps() {
44485        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44486        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44487        let r = _mm256_mask_max_ps(a, 0, a, b);
44488        assert_eq_m256(r, a);
44489        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44490        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44491        assert_eq_m256(r, e);
44492    }
44493
44494    #[simd_test(enable = "avx512f,avx512vl")]
44495    unsafe fn test_mm256_maskz_max_ps() {
44496        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44497        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44498        let r = _mm256_maskz_max_ps(0, a, b);
44499        assert_eq_m256(r, _mm256_setzero_ps());
44500        let r = _mm256_maskz_max_ps(0b11111111, a, b);
44501        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44502        assert_eq_m256(r, e);
44503    }
44504
44505    #[simd_test(enable = "avx512f,avx512vl")]
44506    unsafe fn test_mm_mask_max_ps() {
44507        let a = _mm_set_ps(0., 1., 2., 3.);
44508        let b = _mm_set_ps(3., 2., 1., 0.);
44509        let r = _mm_mask_max_ps(a, 0, a, b);
44510        assert_eq_m128(r, a);
44511        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44512        let e = _mm_set_ps(3., 2., 2., 3.);
44513        assert_eq_m128(r, e);
44514    }
44515
44516    #[simd_test(enable = "avx512f,avx512vl")]
44517    unsafe fn test_mm_maskz_max_ps() {
44518        let a = _mm_set_ps(0., 1., 2., 3.);
44519        let b = _mm_set_ps(3., 2., 1., 0.);
44520        let r = _mm_maskz_max_ps(0, a, b);
44521        assert_eq_m128(r, _mm_setzero_ps());
44522        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44523        let e = _mm_set_ps(3., 2., 2., 3.);
44524        assert_eq_m128(r, e);
44525    }
44526
44527    #[simd_test(enable = "avx512f")]
44528    unsafe fn test_mm512_max_epu32() {
44529        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44530        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44531        let r = _mm512_max_epu32(a, b);
44532        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44533        assert_eq_m512i(r, e);
44534    }
44535
44536    #[simd_test(enable = "avx512f")]
44537    unsafe fn test_mm512_mask_max_epu32() {
44538        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44539        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44540        let r = _mm512_mask_max_epu32(a, 0, a, b);
44541        assert_eq_m512i(r, a);
44542        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44543        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44544        assert_eq_m512i(r, e);
44545    }
44546
44547    #[simd_test(enable = "avx512f")]
44548    unsafe fn test_mm512_maskz_max_epu32() {
44549        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44550        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44551        let r = _mm512_maskz_max_epu32(0, a, b);
44552        assert_eq_m512i(r, _mm512_setzero_si512());
44553        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44554        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44555        assert_eq_m512i(r, e);
44556    }
44557
44558    #[simd_test(enable = "avx512f,avx512vl")]
44559    unsafe fn test_mm256_mask_max_epu32() {
44560        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44561        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44562        let r = _mm256_mask_max_epu32(a, 0, a, b);
44563        assert_eq_m256i(r, a);
44564        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44565        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44566        assert_eq_m256i(r, e);
44567    }
44568
44569    #[simd_test(enable = "avx512f,avx512vl")]
44570    unsafe fn test_mm256_maskz_max_epu32() {
44571        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44572        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44573        let r = _mm256_maskz_max_epu32(0, a, b);
44574        assert_eq_m256i(r, _mm256_setzero_si256());
44575        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44576        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44577        assert_eq_m256i(r, e);
44578    }
44579
44580    #[simd_test(enable = "avx512f,avx512vl")]
44581    unsafe fn test_mm_mask_max_epu32() {
44582        let a = _mm_set_epi32(0, 1, 2, 3);
44583        let b = _mm_set_epi32(3, 2, 1, 0);
44584        let r = _mm_mask_max_epu32(a, 0, a, b);
44585        assert_eq_m128i(r, a);
44586        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44587        let e = _mm_set_epi32(3, 2, 2, 3);
44588        assert_eq_m128i(r, e);
44589    }
44590
44591    #[simd_test(enable = "avx512f,avx512vl")]
44592    unsafe fn test_mm_maskz_max_epu32() {
44593        let a = _mm_set_epi32(0, 1, 2, 3);
44594        let b = _mm_set_epi32(3, 2, 1, 0);
44595        let r = _mm_maskz_max_epu32(0, a, b);
44596        assert_eq_m128i(r, _mm_setzero_si128());
44597        let r = _mm_maskz_max_epu32(0b00001111, a, b);
44598        let e = _mm_set_epi32(3, 2, 2, 3);
44599        assert_eq_m128i(r, e);
44600    }
44601
44602    #[simd_test(enable = "avx512f")]
44603    unsafe fn test_mm512_min_epi32() {
44604        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44605        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44606        let r = _mm512_min_epi32(a, b);
44607        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44608        assert_eq_m512i(r, e);
44609    }
44610
44611    #[simd_test(enable = "avx512f")]
44612    unsafe fn test_mm512_mask_min_epi32() {
44613        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44614        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44615        let r = _mm512_mask_min_epi32(a, 0, a, b);
44616        assert_eq_m512i(r, a);
44617        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44618        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44619        assert_eq_m512i(r, e);
44620    }
44621
44622    #[simd_test(enable = "avx512f")]
44623    unsafe fn test_mm512_maskz_min_epi32() {
44624        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44625        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44626        let r = _mm512_maskz_min_epi32(0, a, b);
44627        assert_eq_m512i(r, _mm512_setzero_si512());
44628        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44629        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44630        assert_eq_m512i(r, e);
44631    }
44632
44633    #[simd_test(enable = "avx512f,avx512vl")]
44634    unsafe fn test_mm256_mask_min_epi32() {
44635        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44636        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44637        let r = _mm256_mask_min_epi32(a, 0, a, b);
44638        assert_eq_m256i(r, a);
44639        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44640        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44641        assert_eq_m256i(r, e);
44642    }
44643
44644    #[simd_test(enable = "avx512f,avx512vl")]
44645    unsafe fn test_mm256_maskz_min_epi32() {
44646        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44647        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44648        let r = _mm256_maskz_min_epi32(0, a, b);
44649        assert_eq_m256i(r, _mm256_setzero_si256());
44650        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44651        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44652        assert_eq_m256i(r, e);
44653    }
44654
44655    #[simd_test(enable = "avx512f,avx512vl")]
44656    unsafe fn test_mm_mask_min_epi32() {
44657        let a = _mm_set_epi32(0, 1, 2, 3);
44658        let b = _mm_set_epi32(3, 2, 1, 0);
44659        let r = _mm_mask_min_epi32(a, 0, a, b);
44660        assert_eq_m128i(r, a);
44661        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44662        let e = _mm_set_epi32(0, 1, 1, 0);
44663        assert_eq_m128i(r, e);
44664    }
44665
44666    #[simd_test(enable = "avx512f,avx512vl")]
44667    unsafe fn test_mm_maskz_min_epi32() {
44668        let a = _mm_set_epi32(0, 1, 2, 3);
44669        let b = _mm_set_epi32(3, 2, 1, 0);
44670        let r = _mm_maskz_min_epi32(0, a, b);
44671        assert_eq_m128i(r, _mm_setzero_si128());
44672        let r = _mm_maskz_min_epi32(0b00001111, a, b);
44673        let e = _mm_set_epi32(0, 1, 1, 0);
44674        assert_eq_m128i(r, e);
44675    }
44676
44677    #[simd_test(enable = "avx512f")]
44678    unsafe fn test_mm512_min_ps() {
44679        let a = _mm512_setr_ps(
44680            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44681        );
44682        let b = _mm512_setr_ps(
44683            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44684        );
44685        let r = _mm512_min_ps(a, b);
44686        let e = _mm512_setr_ps(
44687            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44688        );
44689        assert_eq_m512(r, e);
44690    }
44691
44692    #[simd_test(enable = "avx512f")]
44693    unsafe fn test_mm512_mask_min_ps() {
44694        let a = _mm512_setr_ps(
44695            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44696        );
44697        let b = _mm512_setr_ps(
44698            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44699        );
44700        let r = _mm512_mask_min_ps(a, 0, a, b);
44701        assert_eq_m512(r, a);
44702        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44703        let e = _mm512_setr_ps(
44704            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44705        );
44706        assert_eq_m512(r, e);
44707    }
44708
44709    #[simd_test(enable = "avx512f")]
44710    unsafe fn test_mm512_maskz_min_ps() {
44711        let a = _mm512_setr_ps(
44712            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44713        );
44714        let b = _mm512_setr_ps(
44715            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44716        );
44717        let r = _mm512_maskz_min_ps(0, a, b);
44718        assert_eq_m512(r, _mm512_setzero_ps());
44719        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44720        let e = _mm512_setr_ps(
44721            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44722        );
44723        assert_eq_m512(r, e);
44724    }
44725
44726    #[simd_test(enable = "avx512f,avx512vl")]
44727    unsafe fn test_mm256_mask_min_ps() {
44728        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44729        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44730        let r = _mm256_mask_min_ps(a, 0, a, b);
44731        assert_eq_m256(r, a);
44732        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44733        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44734        assert_eq_m256(r, e);
44735    }
44736
44737    #[simd_test(enable = "avx512f,avx512vl")]
44738    unsafe fn test_mm256_maskz_min_ps() {
44739        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44740        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44741        let r = _mm256_maskz_min_ps(0, a, b);
44742        assert_eq_m256(r, _mm256_setzero_ps());
44743        let r = _mm256_maskz_min_ps(0b11111111, a, b);
44744        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44745        assert_eq_m256(r, e);
44746    }
44747
44748    #[simd_test(enable = "avx512f,avx512vl")]
44749    unsafe fn test_mm_mask_min_ps() {
44750        let a = _mm_set_ps(0., 1., 2., 3.);
44751        let b = _mm_set_ps(3., 2., 1., 0.);
44752        let r = _mm_mask_min_ps(a, 0, a, b);
44753        assert_eq_m128(r, a);
44754        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44755        let e = _mm_set_ps(0., 1., 1., 0.);
44756        assert_eq_m128(r, e);
44757    }
44758
44759    #[simd_test(enable = "avx512f,avx512vl")]
44760    unsafe fn test_mm_maskz_min_ps() {
44761        let a = _mm_set_ps(0., 1., 2., 3.);
44762        let b = _mm_set_ps(3., 2., 1., 0.);
44763        let r = _mm_maskz_min_ps(0, a, b);
44764        assert_eq_m128(r, _mm_setzero_ps());
44765        let r = _mm_maskz_min_ps(0b00001111, a, b);
44766        let e = _mm_set_ps(0., 1., 1., 0.);
44767        assert_eq_m128(r, e);
44768    }
44769
44770    #[simd_test(enable = "avx512f")]
44771    unsafe fn test_mm512_min_epu32() {
44772        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44773        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44774        let r = _mm512_min_epu32(a, b);
44775        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44776        assert_eq_m512i(r, e);
44777    }
44778
44779    #[simd_test(enable = "avx512f")]
44780    unsafe fn test_mm512_mask_min_epu32() {
44781        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44782        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44783        let r = _mm512_mask_min_epu32(a, 0, a, b);
44784        assert_eq_m512i(r, a);
44785        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44786        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44787        assert_eq_m512i(r, e);
44788    }
44789
44790    #[simd_test(enable = "avx512f")]
44791    unsafe fn test_mm512_maskz_min_epu32() {
44792        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44793        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44794        let r = _mm512_maskz_min_epu32(0, a, b);
44795        assert_eq_m512i(r, _mm512_setzero_si512());
44796        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44797        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44798        assert_eq_m512i(r, e);
44799    }
44800
44801    #[simd_test(enable = "avx512f,avx512vl")]
44802    unsafe fn test_mm256_mask_min_epu32() {
44803        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44804        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44805        let r = _mm256_mask_min_epu32(a, 0, a, b);
44806        assert_eq_m256i(r, a);
44807        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44808        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44809        assert_eq_m256i(r, e);
44810    }
44811
44812    #[simd_test(enable = "avx512f,avx512vl")]
44813    unsafe fn test_mm256_maskz_min_epu32() {
44814        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44815        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44816        let r = _mm256_maskz_min_epu32(0, a, b);
44817        assert_eq_m256i(r, _mm256_setzero_si256());
44818        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44819        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44820        assert_eq_m256i(r, e);
44821    }
44822
44823    #[simd_test(enable = "avx512f,avx512vl")]
44824    unsafe fn test_mm_mask_min_epu32() {
44825        let a = _mm_set_epi32(0, 1, 2, 3);
44826        let b = _mm_set_epi32(3, 2, 1, 0);
44827        let r = _mm_mask_min_epu32(a, 0, a, b);
44828        assert_eq_m128i(r, a);
44829        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44830        let e = _mm_set_epi32(0, 1, 1, 0);
44831        assert_eq_m128i(r, e);
44832    }
44833
44834    #[simd_test(enable = "avx512f,avx512vl")]
44835    unsafe fn test_mm_maskz_min_epu32() {
44836        let a = _mm_set_epi32(0, 1, 2, 3);
44837        let b = _mm_set_epi32(3, 2, 1, 0);
44838        let r = _mm_maskz_min_epu32(0, a, b);
44839        assert_eq_m128i(r, _mm_setzero_si128());
44840        let r = _mm_maskz_min_epu32(0b00001111, a, b);
44841        let e = _mm_set_epi32(0, 1, 1, 0);
44842        assert_eq_m128i(r, e);
44843    }
44844
44845    #[simd_test(enable = "avx512f")]
44846    unsafe fn test_mm512_sqrt_ps() {
44847        let a = _mm512_setr_ps(
44848            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44849        );
44850        let r = _mm512_sqrt_ps(a);
44851        let e = _mm512_setr_ps(
44852            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44853        );
44854        assert_eq_m512(r, e);
44855    }
44856
44857    #[simd_test(enable = "avx512f")]
44858    unsafe fn test_mm512_mask_sqrt_ps() {
44859        let a = _mm512_setr_ps(
44860            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44861        );
44862        let r = _mm512_mask_sqrt_ps(a, 0, a);
44863        assert_eq_m512(r, a);
44864        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44865        let e = _mm512_setr_ps(
44866            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44867        );
44868        assert_eq_m512(r, e);
44869    }
44870
44871    #[simd_test(enable = "avx512f")]
44872    unsafe fn test_mm512_maskz_sqrt_ps() {
44873        let a = _mm512_setr_ps(
44874            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44875        );
44876        let r = _mm512_maskz_sqrt_ps(0, a);
44877        assert_eq_m512(r, _mm512_setzero_ps());
44878        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44879        let e = _mm512_setr_ps(
44880            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44881        );
44882        assert_eq_m512(r, e);
44883    }
44884
44885    #[simd_test(enable = "avx512f,avx512vl")]
44886    unsafe fn test_mm256_mask_sqrt_ps() {
44887        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44888        let r = _mm256_mask_sqrt_ps(a, 0, a);
44889        assert_eq_m256(r, a);
44890        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44891        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44892        assert_eq_m256(r, e);
44893    }
44894
44895    #[simd_test(enable = "avx512f,avx512vl")]
44896    unsafe fn test_mm256_maskz_sqrt_ps() {
44897        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44898        let r = _mm256_maskz_sqrt_ps(0, a);
44899        assert_eq_m256(r, _mm256_setzero_ps());
44900        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44901        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44902        assert_eq_m256(r, e);
44903    }
44904
44905    #[simd_test(enable = "avx512f,avx512vl")]
44906    unsafe fn test_mm_mask_sqrt_ps() {
44907        let a = _mm_set_ps(0., 1., 4., 9.);
44908        let r = _mm_mask_sqrt_ps(a, 0, a);
44909        assert_eq_m128(r, a);
44910        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44911        let e = _mm_set_ps(0., 1., 2., 3.);
44912        assert_eq_m128(r, e);
44913    }
44914
44915    #[simd_test(enable = "avx512f,avx512vl")]
44916    unsafe fn test_mm_maskz_sqrt_ps() {
44917        let a = _mm_set_ps(0., 1., 4., 9.);
44918        let r = _mm_maskz_sqrt_ps(0, a);
44919        assert_eq_m128(r, _mm_setzero_ps());
44920        let r = _mm_maskz_sqrt_ps(0b00001111, a);
44921        let e = _mm_set_ps(0., 1., 2., 3.);
44922        assert_eq_m128(r, e);
44923    }
44924
44925    #[simd_test(enable = "avx512f")]
44926    unsafe fn test_mm512_fmadd_ps() {
44927        let a = _mm512_set1_ps(1.);
44928        let b = _mm512_setr_ps(
44929            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44930        );
44931        let c = _mm512_set1_ps(1.);
44932        let r = _mm512_fmadd_ps(a, b, c);
44933        let e = _mm512_setr_ps(
44934            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44935        );
44936        assert_eq_m512(r, e);
44937    }
44938
44939    #[simd_test(enable = "avx512f")]
44940    unsafe fn test_mm512_mask_fmadd_ps() {
44941        let a = _mm512_set1_ps(1.);
44942        let b = _mm512_setr_ps(
44943            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44944        );
44945        let c = _mm512_set1_ps(1.);
44946        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
44947        assert_eq_m512(r, a);
44948        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
44949        let e = _mm512_setr_ps(
44950            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44951        );
44952        assert_eq_m512(r, e);
44953    }
44954
44955    #[simd_test(enable = "avx512f")]
44956    unsafe fn test_mm512_maskz_fmadd_ps() {
44957        let a = _mm512_set1_ps(1.);
44958        let b = _mm512_setr_ps(
44959            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44960        );
44961        let c = _mm512_set1_ps(1.);
44962        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
44963        assert_eq_m512(r, _mm512_setzero_ps());
44964        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
44965        let e = _mm512_setr_ps(
44966            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44967        );
44968        assert_eq_m512(r, e);
44969    }
44970
44971    #[simd_test(enable = "avx512f")]
44972    unsafe fn test_mm512_mask3_fmadd_ps() {
44973        let a = _mm512_set1_ps(1.);
44974        let b = _mm512_setr_ps(
44975            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44976        );
44977        let c = _mm512_set1_ps(2.);
44978        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
44979        assert_eq_m512(r, c);
44980        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
44981        let e = _mm512_setr_ps(
44982            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
44983        );
44984        assert_eq_m512(r, e);
44985    }
44986
44987    #[simd_test(enable = "avx512f,avx512vl")]
44988    unsafe fn test_mm256_mask_fmadd_ps() {
44989        let a = _mm256_set1_ps(1.);
44990        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44991        let c = _mm256_set1_ps(1.);
44992        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
44993        assert_eq_m256(r, a);
44994        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
44995        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44996        assert_eq_m256(r, e);
44997    }
44998
44999    #[simd_test(enable = "avx512f,avx512vl")]
45000    unsafe fn test_mm256_maskz_fmadd_ps() {
45001        let a = _mm256_set1_ps(1.);
45002        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45003        let c = _mm256_set1_ps(1.);
45004        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
45005        assert_eq_m256(r, _mm256_setzero_ps());
45006        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
45007        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45008        assert_eq_m256(r, e);
45009    }
45010
45011    #[simd_test(enable = "avx512f,avx512vl")]
45012    unsafe fn test_mm256_mask3_fmadd_ps() {
45013        let a = _mm256_set1_ps(1.);
45014        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45015        let c = _mm256_set1_ps(1.);
45016        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
45017        assert_eq_m256(r, c);
45018        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45019        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45020        assert_eq_m256(r, e);
45021    }
45022
45023    #[simd_test(enable = "avx512f,avx512vl")]
45024    unsafe fn test_mm_mask_fmadd_ps() {
45025        let a = _mm_set1_ps(1.);
45026        let b = _mm_set_ps(0., 1., 2., 3.);
45027        let c = _mm_set1_ps(1.);
45028        let r = _mm_mask_fmadd_ps(a, 0, b, c);
45029        assert_eq_m128(r, a);
45030        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45031        let e = _mm_set_ps(1., 2., 3., 4.);
45032        assert_eq_m128(r, e);
45033    }
45034
45035    #[simd_test(enable = "avx512f,avx512vl")]
45036    unsafe fn test_mm_maskz_fmadd_ps() {
45037        let a = _mm_set1_ps(1.);
45038        let b = _mm_set_ps(0., 1., 2., 3.);
45039        let c = _mm_set1_ps(1.);
45040        let r = _mm_maskz_fmadd_ps(0, a, b, c);
45041        assert_eq_m128(r, _mm_setzero_ps());
45042        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45043        let e = _mm_set_ps(1., 2., 3., 4.);
45044        assert_eq_m128(r, e);
45045    }
45046
45047    #[simd_test(enable = "avx512f,avx512vl")]
45048    unsafe fn test_mm_mask3_fmadd_ps() {
45049        let a = _mm_set1_ps(1.);
45050        let b = _mm_set_ps(0., 1., 2., 3.);
45051        let c = _mm_set1_ps(1.);
45052        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45053        assert_eq_m128(r, c);
45054        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45055        let e = _mm_set_ps(1., 2., 3., 4.);
45056        assert_eq_m128(r, e);
45057    }
45058
45059    #[simd_test(enable = "avx512f")]
45060    unsafe fn test_mm512_fmsub_ps() {
45061        let a = _mm512_setr_ps(
45062            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45063        );
45064        let b = _mm512_setr_ps(
45065            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45066        );
45067        let c = _mm512_setr_ps(
45068            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45069        );
45070        let r = _mm512_fmsub_ps(a, b, c);
45071        let e = _mm512_setr_ps(
45072            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45073        );
45074        assert_eq_m512(r, e);
45075    }
45076
45077    #[simd_test(enable = "avx512f")]
45078    unsafe fn test_mm512_mask_fmsub_ps() {
45079        let a = _mm512_set1_ps(1.);
45080        let b = _mm512_setr_ps(
45081            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45082        );
45083        let c = _mm512_set1_ps(1.);
45084        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45085        assert_eq_m512(r, a);
45086        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45087        let e = _mm512_setr_ps(
45088            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45089        );
45090        assert_eq_m512(r, e);
45091    }
45092
45093    #[simd_test(enable = "avx512f")]
45094    unsafe fn test_mm512_maskz_fmsub_ps() {
45095        let a = _mm512_set1_ps(1.);
45096        let b = _mm512_setr_ps(
45097            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45098        );
45099        let c = _mm512_set1_ps(1.);
45100        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45101        assert_eq_m512(r, _mm512_setzero_ps());
45102        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45103        let e = _mm512_setr_ps(
45104            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45105        );
45106        assert_eq_m512(r, e);
45107    }
45108
45109    #[simd_test(enable = "avx512f")]
45110    unsafe fn test_mm512_mask3_fmsub_ps() {
45111        let a = _mm512_set1_ps(1.);
45112        let b = _mm512_setr_ps(
45113            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45114        );
45115        let c = _mm512_setr_ps(
45116            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45117        );
45118        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45119        assert_eq_m512(r, c);
45120        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45121        let e = _mm512_setr_ps(
45122            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45123        );
45124        assert_eq_m512(r, e);
45125    }
45126
45127    #[simd_test(enable = "avx512f,avx512vl")]
45128    unsafe fn test_mm256_mask_fmsub_ps() {
45129        let a = _mm256_set1_ps(1.);
45130        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45131        let c = _mm256_set1_ps(1.);
45132        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45133        assert_eq_m256(r, a);
45134        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45135        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45136        assert_eq_m256(r, e);
45137    }
45138
45139    #[simd_test(enable = "avx512f,avx512vl")]
45140    unsafe fn test_mm256_maskz_fmsub_ps() {
45141        let a = _mm256_set1_ps(1.);
45142        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45143        let c = _mm256_set1_ps(1.);
45144        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45145        assert_eq_m256(r, _mm256_setzero_ps());
45146        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45147        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45148        assert_eq_m256(r, e);
45149    }
45150
45151    #[simd_test(enable = "avx512f,avx512vl")]
45152    unsafe fn test_mm256_mask3_fmsub_ps() {
45153        let a = _mm256_set1_ps(1.);
45154        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45155        let c = _mm256_set1_ps(1.);
45156        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45157        assert_eq_m256(r, c);
45158        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45159        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45160        assert_eq_m256(r, e);
45161    }
45162
45163    #[simd_test(enable = "avx512f,avx512vl")]
45164    unsafe fn test_mm_mask_fmsub_ps() {
45165        let a = _mm_set1_ps(1.);
45166        let b = _mm_set_ps(0., 1., 2., 3.);
45167        let c = _mm_set1_ps(1.);
45168        let r = _mm_mask_fmsub_ps(a, 0, b, c);
45169        assert_eq_m128(r, a);
45170        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45171        let e = _mm_set_ps(-1., 0., 1., 2.);
45172        assert_eq_m128(r, e);
45173    }
45174
45175    #[simd_test(enable = "avx512f,avx512vl")]
45176    unsafe fn test_mm_maskz_fmsub_ps() {
45177        let a = _mm_set1_ps(1.);
45178        let b = _mm_set_ps(0., 1., 2., 3.);
45179        let c = _mm_set1_ps(1.);
45180        let r = _mm_maskz_fmsub_ps(0, a, b, c);
45181        assert_eq_m128(r, _mm_setzero_ps());
45182        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45183        let e = _mm_set_ps(-1., 0., 1., 2.);
45184        assert_eq_m128(r, e);
45185    }
45186
45187    #[simd_test(enable = "avx512f,avx512vl")]
45188    unsafe fn test_mm_mask3_fmsub_ps() {
45189        let a = _mm_set1_ps(1.);
45190        let b = _mm_set_ps(0., 1., 2., 3.);
45191        let c = _mm_set1_ps(1.);
45192        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45193        assert_eq_m128(r, c);
45194        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45195        let e = _mm_set_ps(-1., 0., 1., 2.);
45196        assert_eq_m128(r, e);
45197    }
45198
45199    #[simd_test(enable = "avx512f")]
45200    unsafe fn test_mm512_fmaddsub_ps() {
45201        let a = _mm512_set1_ps(1.);
45202        let b = _mm512_setr_ps(
45203            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45204        );
45205        let c = _mm512_set1_ps(1.);
45206        let r = _mm512_fmaddsub_ps(a, b, c);
45207        let e = _mm512_setr_ps(
45208            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45209        );
45210        assert_eq_m512(r, e);
45211    }
45212
45213    #[simd_test(enable = "avx512f")]
45214    unsafe fn test_mm512_mask_fmaddsub_ps() {
45215        let a = _mm512_set1_ps(1.);
45216        let b = _mm512_setr_ps(
45217            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45218        );
45219        let c = _mm512_set1_ps(1.);
45220        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45221        assert_eq_m512(r, a);
45222        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45223        let e = _mm512_setr_ps(
45224            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45225        );
45226        assert_eq_m512(r, e);
45227    }
45228
45229    #[simd_test(enable = "avx512f")]
45230    unsafe fn test_mm512_maskz_fmaddsub_ps() {
45231        let a = _mm512_set1_ps(1.);
45232        let b = _mm512_setr_ps(
45233            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45234        );
45235        let c = _mm512_set1_ps(1.);
45236        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45237        assert_eq_m512(r, _mm512_setzero_ps());
45238        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45239        let e = _mm512_setr_ps(
45240            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45241        );
45242        assert_eq_m512(r, e);
45243    }
45244
45245    #[simd_test(enable = "avx512f")]
45246    unsafe fn test_mm512_mask3_fmaddsub_ps() {
45247        let a = _mm512_set1_ps(1.);
45248        let b = _mm512_setr_ps(
45249            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45250        );
45251        let c = _mm512_setr_ps(
45252            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45253        );
45254        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45255        assert_eq_m512(r, c);
45256        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45257        let e = _mm512_setr_ps(
45258            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45259        );
45260        assert_eq_m512(r, e);
45261    }
45262
45263    #[simd_test(enable = "avx512f,avx512vl")]
45264    unsafe fn test_mm256_mask_fmaddsub_ps() {
45265        let a = _mm256_set1_ps(1.);
45266        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45267        let c = _mm256_set1_ps(1.);
45268        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45269        assert_eq_m256(r, a);
45270        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45271        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45272        assert_eq_m256(r, e);
45273    }
45274
45275    #[simd_test(enable = "avx512f,avx512vl")]
45276    unsafe fn test_mm256_maskz_fmaddsub_ps() {
45277        let a = _mm256_set1_ps(1.);
45278        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45279        let c = _mm256_set1_ps(1.);
45280        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45281        assert_eq_m256(r, _mm256_setzero_ps());
45282        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45283        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45284        assert_eq_m256(r, e);
45285    }
45286
45287    #[simd_test(enable = "avx512f,avx512vl")]
45288    unsafe fn test_mm256_mask3_fmaddsub_ps() {
45289        let a = _mm256_set1_ps(1.);
45290        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45291        let c = _mm256_set1_ps(1.);
45292        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45293        assert_eq_m256(r, c);
45294        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45295        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45296        assert_eq_m256(r, e);
45297    }
45298
45299    #[simd_test(enable = "avx512f,avx512vl")]
45300    unsafe fn test_mm_mask_fmaddsub_ps() {
45301        let a = _mm_set1_ps(1.);
45302        let b = _mm_set_ps(0., 1., 2., 3.);
45303        let c = _mm_set1_ps(1.);
45304        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45305        assert_eq_m128(r, a);
45306        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45307        let e = _mm_set_ps(1., 0., 3., 2.);
45308        assert_eq_m128(r, e);
45309    }
45310
45311    #[simd_test(enable = "avx512f,avx512vl")]
45312    unsafe fn test_mm_maskz_fmaddsub_ps() {
45313        let a = _mm_set1_ps(1.);
45314        let b = _mm_set_ps(0., 1., 2., 3.);
45315        let c = _mm_set1_ps(1.);
45316        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45317        assert_eq_m128(r, _mm_setzero_ps());
45318        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45319        let e = _mm_set_ps(1., 0., 3., 2.);
45320        assert_eq_m128(r, e);
45321    }
45322
45323    #[simd_test(enable = "avx512f,avx512vl")]
45324    unsafe fn test_mm_mask3_fmaddsub_ps() {
45325        let a = _mm_set1_ps(1.);
45326        let b = _mm_set_ps(0., 1., 2., 3.);
45327        let c = _mm_set1_ps(1.);
45328        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45329        assert_eq_m128(r, c);
45330        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45331        let e = _mm_set_ps(1., 0., 3., 2.);
45332        assert_eq_m128(r, e);
45333    }
45334
45335    #[simd_test(enable = "avx512f")]
45336    unsafe fn test_mm512_fmsubadd_ps() {
45337        let a = _mm512_setr_ps(
45338            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45339        );
45340        let b = _mm512_setr_ps(
45341            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45342        );
45343        let c = _mm512_setr_ps(
45344            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45345        );
45346        let r = _mm512_fmsubadd_ps(a, b, c);
45347        let e = _mm512_setr_ps(
45348            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45349        );
45350        assert_eq_m512(r, e);
45351    }
45352
45353    #[simd_test(enable = "avx512f")]
45354    unsafe fn test_mm512_mask_fmsubadd_ps() {
45355        let a = _mm512_set1_ps(1.);
45356        let b = _mm512_setr_ps(
45357            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45358        );
45359        let c = _mm512_set1_ps(1.);
45360        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45361        assert_eq_m512(r, a);
45362        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45363        let e = _mm512_setr_ps(
45364            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45365        );
45366        assert_eq_m512(r, e);
45367    }
45368
45369    #[simd_test(enable = "avx512f")]
45370    unsafe fn test_mm512_maskz_fmsubadd_ps() {
45371        let a = _mm512_set1_ps(1.);
45372        let b = _mm512_setr_ps(
45373            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45374        );
45375        let c = _mm512_set1_ps(1.);
45376        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45377        assert_eq_m512(r, _mm512_setzero_ps());
45378        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45379        let e = _mm512_setr_ps(
45380            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45381        );
45382        assert_eq_m512(r, e);
45383    }
45384
45385    #[simd_test(enable = "avx512f")]
45386    unsafe fn test_mm512_mask3_fmsubadd_ps() {
45387        let a = _mm512_set1_ps(1.);
45388        let b = _mm512_setr_ps(
45389            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45390        );
45391        let c = _mm512_setr_ps(
45392            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45393        );
45394        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45395        assert_eq_m512(r, c);
45396        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45397        let e = _mm512_setr_ps(
45398            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45399        );
45400        assert_eq_m512(r, e);
45401    }
45402
45403    #[simd_test(enable = "avx512f,avx512vl")]
45404    unsafe fn test_mm256_mask_fmsubadd_ps() {
45405        let a = _mm256_set1_ps(1.);
45406        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45407        let c = _mm256_set1_ps(1.);
45408        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45409        assert_eq_m256(r, a);
45410        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45411        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45412        assert_eq_m256(r, e);
45413    }
45414
45415    #[simd_test(enable = "avx512f,avx512vl")]
45416    unsafe fn test_mm256_maskz_fmsubadd_ps() {
45417        let a = _mm256_set1_ps(1.);
45418        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45419        let c = _mm256_set1_ps(1.);
45420        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45421        assert_eq_m256(r, _mm256_setzero_ps());
45422        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45423        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45424        assert_eq_m256(r, e);
45425    }
45426
45427    #[simd_test(enable = "avx512f,avx512vl")]
45428    unsafe fn test_mm256_mask3_fmsubadd_ps() {
45429        let a = _mm256_set1_ps(1.);
45430        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45431        let c = _mm256_set1_ps(1.);
45432        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45433        assert_eq_m256(r, c);
45434        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45435        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45436        assert_eq_m256(r, e);
45437    }
45438
45439    #[simd_test(enable = "avx512f,avx512vl")]
45440    unsafe fn test_mm_mask_fmsubadd_ps() {
45441        let a = _mm_set1_ps(1.);
45442        let b = _mm_set_ps(0., 1., 2., 3.);
45443        let c = _mm_set1_ps(1.);
45444        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45445        assert_eq_m128(r, a);
45446        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45447        let e = _mm_set_ps(-1., 2., 1., 4.);
45448        assert_eq_m128(r, e);
45449    }
45450
45451    #[simd_test(enable = "avx512f,avx512vl")]
45452    unsafe fn test_mm_maskz_fmsubadd_ps() {
45453        let a = _mm_set1_ps(1.);
45454        let b = _mm_set_ps(0., 1., 2., 3.);
45455        let c = _mm_set1_ps(1.);
45456        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45457        assert_eq_m128(r, _mm_setzero_ps());
45458        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45459        let e = _mm_set_ps(-1., 2., 1., 4.);
45460        assert_eq_m128(r, e);
45461    }
45462
45463    #[simd_test(enable = "avx512f,avx512vl")]
45464    unsafe fn test_mm_mask3_fmsubadd_ps() {
45465        let a = _mm_set1_ps(1.);
45466        let b = _mm_set_ps(0., 1., 2., 3.);
45467        let c = _mm_set1_ps(1.);
45468        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45469        assert_eq_m128(r, c);
45470        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45471        let e = _mm_set_ps(-1., 2., 1., 4.);
45472        assert_eq_m128(r, e);
45473    }
45474
45475    #[simd_test(enable = "avx512f")]
45476    unsafe fn test_mm512_fnmadd_ps() {
45477        let a = _mm512_set1_ps(1.);
45478        let b = _mm512_setr_ps(
45479            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45480        );
45481        let c = _mm512_set1_ps(1.);
45482        let r = _mm512_fnmadd_ps(a, b, c);
45483        let e = _mm512_setr_ps(
45484            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45485        );
45486        assert_eq_m512(r, e);
45487    }
45488
45489    #[simd_test(enable = "avx512f")]
45490    unsafe fn test_mm512_mask_fnmadd_ps() {
45491        let a = _mm512_set1_ps(1.);
45492        let b = _mm512_setr_ps(
45493            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45494        );
45495        let c = _mm512_set1_ps(1.);
45496        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45497        assert_eq_m512(r, a);
45498        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45499        let e = _mm512_setr_ps(
45500            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45501        );
45502        assert_eq_m512(r, e);
45503    }
45504
45505    #[simd_test(enable = "avx512f")]
45506    unsafe fn test_mm512_maskz_fnmadd_ps() {
45507        let a = _mm512_set1_ps(1.);
45508        let b = _mm512_setr_ps(
45509            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45510        );
45511        let c = _mm512_set1_ps(1.);
45512        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45513        assert_eq_m512(r, _mm512_setzero_ps());
45514        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45515        let e = _mm512_setr_ps(
45516            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45517        );
45518        assert_eq_m512(r, e);
45519    }
45520
45521    #[simd_test(enable = "avx512f")]
45522    unsafe fn test_mm512_mask3_fnmadd_ps() {
45523        let a = _mm512_set1_ps(1.);
45524        let b = _mm512_setr_ps(
45525            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45526        );
45527        let c = _mm512_setr_ps(
45528            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45529        );
45530        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45531        assert_eq_m512(r, c);
45532        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45533        let e = _mm512_setr_ps(
45534            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45535        );
45536        assert_eq_m512(r, e);
45537    }
45538
45539    #[simd_test(enable = "avx512f,avx512vl")]
45540    unsafe fn test_mm256_mask_fnmadd_ps() {
45541        let a = _mm256_set1_ps(1.);
45542        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45543        let c = _mm256_set1_ps(1.);
45544        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45545        assert_eq_m256(r, a);
45546        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45547        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45548        assert_eq_m256(r, e);
45549    }
45550
45551    #[simd_test(enable = "avx512f,avx512vl")]
45552    unsafe fn test_mm256_maskz_fnmadd_ps() {
45553        let a = _mm256_set1_ps(1.);
45554        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45555        let c = _mm256_set1_ps(1.);
45556        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45557        assert_eq_m256(r, _mm256_setzero_ps());
45558        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45559        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45560        assert_eq_m256(r, e);
45561    }
45562
45563    #[simd_test(enable = "avx512f,avx512vl")]
45564    unsafe fn test_mm256_mask3_fnmadd_ps() {
45565        let a = _mm256_set1_ps(1.);
45566        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45567        let c = _mm256_set1_ps(1.);
45568        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45569        assert_eq_m256(r, c);
45570        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45571        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45572        assert_eq_m256(r, e);
45573    }
45574
45575    #[simd_test(enable = "avx512f,avx512vl")]
45576    unsafe fn test_mm_mask_fnmadd_ps() {
45577        let a = _mm_set1_ps(1.);
45578        let b = _mm_set_ps(0., 1., 2., 3.);
45579        let c = _mm_set1_ps(1.);
45580        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45581        assert_eq_m128(r, a);
45582        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45583        let e = _mm_set_ps(1., 0., -1., -2.);
45584        assert_eq_m128(r, e);
45585    }
45586
45587    #[simd_test(enable = "avx512f,avx512vl")]
45588    unsafe fn test_mm_maskz_fnmadd_ps() {
45589        let a = _mm_set1_ps(1.);
45590        let b = _mm_set_ps(0., 1., 2., 3.);
45591        let c = _mm_set1_ps(1.);
45592        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45593        assert_eq_m128(r, _mm_setzero_ps());
45594        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45595        let e = _mm_set_ps(1., 0., -1., -2.);
45596        assert_eq_m128(r, e);
45597    }
45598
45599    #[simd_test(enable = "avx512f,avx512vl")]
45600    unsafe fn test_mm_mask3_fnmadd_ps() {
45601        let a = _mm_set1_ps(1.);
45602        let b = _mm_set_ps(0., 1., 2., 3.);
45603        let c = _mm_set1_ps(1.);
45604        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45605        assert_eq_m128(r, c);
45606        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45607        let e = _mm_set_ps(1., 0., -1., -2.);
45608        assert_eq_m128(r, e);
45609    }
45610
45611    #[simd_test(enable = "avx512f")]
45612    unsafe fn test_mm512_fnmsub_ps() {
45613        let a = _mm512_set1_ps(1.);
45614        let b = _mm512_setr_ps(
45615            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45616        );
45617        let c = _mm512_set1_ps(1.);
45618        let r = _mm512_fnmsub_ps(a, b, c);
45619        let e = _mm512_setr_ps(
45620            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45621        );
45622        assert_eq_m512(r, e);
45623    }
45624
45625    #[simd_test(enable = "avx512f")]
45626    unsafe fn test_mm512_mask_fnmsub_ps() {
45627        let a = _mm512_set1_ps(1.);
45628        let b = _mm512_setr_ps(
45629            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45630        );
45631        let c = _mm512_set1_ps(1.);
45632        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45633        assert_eq_m512(r, a);
45634        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45635        let e = _mm512_setr_ps(
45636            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45637        );
45638        assert_eq_m512(r, e);
45639    }
45640
45641    #[simd_test(enable = "avx512f")]
45642    unsafe fn test_mm512_maskz_fnmsub_ps() {
45643        let a = _mm512_set1_ps(1.);
45644        let b = _mm512_setr_ps(
45645            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45646        );
45647        let c = _mm512_set1_ps(1.);
45648        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45649        assert_eq_m512(r, _mm512_setzero_ps());
45650        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45651        let e = _mm512_setr_ps(
45652            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45653        );
45654        assert_eq_m512(r, e);
45655    }
45656
45657    #[simd_test(enable = "avx512f")]
45658    unsafe fn test_mm512_mask3_fnmsub_ps() {
45659        let a = _mm512_set1_ps(1.);
45660        let b = _mm512_setr_ps(
45661            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45662        );
45663        let c = _mm512_setr_ps(
45664            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45665        );
45666        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45667        assert_eq_m512(r, c);
45668        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45669        let e = _mm512_setr_ps(
45670            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45671        );
45672        assert_eq_m512(r, e);
45673    }
45674
45675    #[simd_test(enable = "avx512f,avx512vl")]
45676    unsafe fn test_mm256_mask_fnmsub_ps() {
45677        let a = _mm256_set1_ps(1.);
45678        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45679        let c = _mm256_set1_ps(1.);
45680        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45681        assert_eq_m256(r, a);
45682        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45683        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45684        assert_eq_m256(r, e);
45685    }
45686
45687    #[simd_test(enable = "avx512f,avx512vl")]
45688    unsafe fn test_mm256_maskz_fnmsub_ps() {
45689        let a = _mm256_set1_ps(1.);
45690        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45691        let c = _mm256_set1_ps(1.);
45692        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45693        assert_eq_m256(r, _mm256_setzero_ps());
45694        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45695        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45696        assert_eq_m256(r, e);
45697    }
45698
45699    #[simd_test(enable = "avx512f,avx512vl")]
45700    unsafe fn test_mm256_mask3_fnmsub_ps() {
45701        let a = _mm256_set1_ps(1.);
45702        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45703        let c = _mm256_set1_ps(1.);
45704        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45705        assert_eq_m256(r, c);
45706        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45707        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45708        assert_eq_m256(r, e);
45709    }
45710
45711    #[simd_test(enable = "avx512f,avx512vl")]
45712    unsafe fn test_mm_mask_fnmsub_ps() {
45713        let a = _mm_set1_ps(1.);
45714        let b = _mm_set_ps(0., 1., 2., 3.);
45715        let c = _mm_set1_ps(1.);
45716        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45717        assert_eq_m128(r, a);
45718        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45719        let e = _mm_set_ps(-1., -2., -3., -4.);
45720        assert_eq_m128(r, e);
45721    }
45722
45723    #[simd_test(enable = "avx512f,avx512vl")]
45724    unsafe fn test_mm_maskz_fnmsub_ps() {
45725        let a = _mm_set1_ps(1.);
45726        let b = _mm_set_ps(0., 1., 2., 3.);
45727        let c = _mm_set1_ps(1.);
45728        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45729        assert_eq_m128(r, _mm_setzero_ps());
45730        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45731        let e = _mm_set_ps(-1., -2., -3., -4.);
45732        assert_eq_m128(r, e);
45733    }
45734
45735    #[simd_test(enable = "avx512f,avx512vl")]
45736    unsafe fn test_mm_mask3_fnmsub_ps() {
45737        let a = _mm_set1_ps(1.);
45738        let b = _mm_set_ps(0., 1., 2., 3.);
45739        let c = _mm_set1_ps(1.);
45740        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45741        assert_eq_m128(r, c);
45742        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45743        let e = _mm_set_ps(-1., -2., -3., -4.);
45744        assert_eq_m128(r, e);
45745    }
45746
45747    #[simd_test(enable = "avx512f")]
45748    unsafe fn test_mm512_rcp14_ps() {
45749        let a = _mm512_set1_ps(3.);
45750        let r = _mm512_rcp14_ps(a);
45751        let e = _mm512_set1_ps(0.33333206);
45752        assert_eq_m512(r, e);
45753    }
45754
45755    #[simd_test(enable = "avx512f")]
45756    unsafe fn test_mm512_mask_rcp14_ps() {
45757        let a = _mm512_set1_ps(3.);
45758        let r = _mm512_mask_rcp14_ps(a, 0, a);
45759        assert_eq_m512(r, a);
45760        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45761        let e = _mm512_setr_ps(
45762            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45763            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45764        );
45765        assert_eq_m512(r, e);
45766    }
45767
45768    #[simd_test(enable = "avx512f")]
45769    unsafe fn test_mm512_maskz_rcp14_ps() {
45770        let a = _mm512_set1_ps(3.);
45771        let r = _mm512_maskz_rcp14_ps(0, a);
45772        assert_eq_m512(r, _mm512_setzero_ps());
45773        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45774        let e = _mm512_setr_ps(
45775            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45776            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45777        );
45778        assert_eq_m512(r, e);
45779    }
45780
45781    #[simd_test(enable = "avx512f,avx512vl")]
45782    unsafe fn test_mm256_rcp14_ps() {
45783        let a = _mm256_set1_ps(3.);
45784        let r = _mm256_rcp14_ps(a);
45785        let e = _mm256_set1_ps(0.33333206);
45786        assert_eq_m256(r, e);
45787    }
45788
45789    #[simd_test(enable = "avx512f,avx512vl")]
45790    unsafe fn test_mm256_mask_rcp14_ps() {
45791        let a = _mm256_set1_ps(3.);
45792        let r = _mm256_mask_rcp14_ps(a, 0, a);
45793        assert_eq_m256(r, a);
45794        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45795        let e = _mm256_set1_ps(0.33333206);
45796        assert_eq_m256(r, e);
45797    }
45798
45799    #[simd_test(enable = "avx512f,avx512vl")]
45800    unsafe fn test_mm256_maskz_rcp14_ps() {
45801        let a = _mm256_set1_ps(3.);
45802        let r = _mm256_maskz_rcp14_ps(0, a);
45803        assert_eq_m256(r, _mm256_setzero_ps());
45804        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45805        let e = _mm256_set1_ps(0.33333206);
45806        assert_eq_m256(r, e);
45807    }
45808
45809    #[simd_test(enable = "avx512f,avx512vl")]
45810    unsafe fn test_mm_rcp14_ps() {
45811        let a = _mm_set1_ps(3.);
45812        let r = _mm_rcp14_ps(a);
45813        let e = _mm_set1_ps(0.33333206);
45814        assert_eq_m128(r, e);
45815    }
45816
45817    #[simd_test(enable = "avx512f,avx512vl")]
45818    unsafe fn test_mm_mask_rcp14_ps() {
45819        let a = _mm_set1_ps(3.);
45820        let r = _mm_mask_rcp14_ps(a, 0, a);
45821        assert_eq_m128(r, a);
45822        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45823        let e = _mm_set1_ps(0.33333206);
45824        assert_eq_m128(r, e);
45825    }
45826
45827    #[simd_test(enable = "avx512f,avx512vl")]
45828    unsafe fn test_mm_maskz_rcp14_ps() {
45829        let a = _mm_set1_ps(3.);
45830        let r = _mm_maskz_rcp14_ps(0, a);
45831        assert_eq_m128(r, _mm_setzero_ps());
45832        let r = _mm_maskz_rcp14_ps(0b00001111, a);
45833        let e = _mm_set1_ps(0.33333206);
45834        assert_eq_m128(r, e);
45835    }
45836
45837    #[simd_test(enable = "avx512f")]
45838    unsafe fn test_mm512_rsqrt14_ps() {
45839        let a = _mm512_set1_ps(3.);
45840        let r = _mm512_rsqrt14_ps(a);
45841        let e = _mm512_set1_ps(0.5773392);
45842        assert_eq_m512(r, e);
45843    }
45844
45845    #[simd_test(enable = "avx512f")]
45846    unsafe fn test_mm512_mask_rsqrt14_ps() {
45847        let a = _mm512_set1_ps(3.);
45848        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45849        assert_eq_m512(r, a);
45850        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45851        let e = _mm512_setr_ps(
45852            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45853            0.5773392, 0.5773392, 0.5773392,
45854        );
45855        assert_eq_m512(r, e);
45856    }
45857
45858    #[simd_test(enable = "avx512f")]
45859    unsafe fn test_mm512_maskz_rsqrt14_ps() {
45860        let a = _mm512_set1_ps(3.);
45861        let r = _mm512_maskz_rsqrt14_ps(0, a);
45862        assert_eq_m512(r, _mm512_setzero_ps());
45863        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45864        let e = _mm512_setr_ps(
45865            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45866            0.5773392, 0.5773392, 0.5773392,
45867        );
45868        assert_eq_m512(r, e);
45869    }
45870
45871    #[simd_test(enable = "avx512f,avx512vl")]
45872    unsafe fn test_mm256_rsqrt14_ps() {
45873        let a = _mm256_set1_ps(3.);
45874        let r = _mm256_rsqrt14_ps(a);
45875        let e = _mm256_set1_ps(0.5773392);
45876        assert_eq_m256(r, e);
45877    }
45878
45879    #[simd_test(enable = "avx512f,avx512vl")]
45880    unsafe fn test_mm256_mask_rsqrt14_ps() {
45881        let a = _mm256_set1_ps(3.);
45882        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45883        assert_eq_m256(r, a);
45884        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45885        let e = _mm256_set1_ps(0.5773392);
45886        assert_eq_m256(r, e);
45887    }
45888
45889    #[simd_test(enable = "avx512f,avx512vl")]
45890    unsafe fn test_mm256_maskz_rsqrt14_ps() {
45891        let a = _mm256_set1_ps(3.);
45892        let r = _mm256_maskz_rsqrt14_ps(0, a);
45893        assert_eq_m256(r, _mm256_setzero_ps());
45894        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45895        let e = _mm256_set1_ps(0.5773392);
45896        assert_eq_m256(r, e);
45897    }
45898
45899    #[simd_test(enable = "avx512f,avx512vl")]
45900    unsafe fn test_mm_rsqrt14_ps() {
45901        let a = _mm_set1_ps(3.);
45902        let r = _mm_rsqrt14_ps(a);
45903        let e = _mm_set1_ps(0.5773392);
45904        assert_eq_m128(r, e);
45905    }
45906
45907    #[simd_test(enable = "avx512f,avx512vl")]
45908    unsafe fn test_mm_mask_rsqrt14_ps() {
45909        let a = _mm_set1_ps(3.);
45910        let r = _mm_mask_rsqrt14_ps(a, 0, a);
45911        assert_eq_m128(r, a);
45912        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45913        let e = _mm_set1_ps(0.5773392);
45914        assert_eq_m128(r, e);
45915    }
45916
45917    #[simd_test(enable = "avx512f,avx512vl")]
45918    unsafe fn test_mm_maskz_rsqrt14_ps() {
45919        let a = _mm_set1_ps(3.);
45920        let r = _mm_maskz_rsqrt14_ps(0, a);
45921        assert_eq_m128(r, _mm_setzero_ps());
45922        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45923        let e = _mm_set1_ps(0.5773392);
45924        assert_eq_m128(r, e);
45925    }
45926
45927    #[simd_test(enable = "avx512f")]
45928    unsafe fn test_mm512_getexp_ps() {
45929        let a = _mm512_set1_ps(3.);
45930        let r = _mm512_getexp_ps(a);
45931        let e = _mm512_set1_ps(1.);
45932        assert_eq_m512(r, e);
45933    }
45934
45935    #[simd_test(enable = "avx512f")]
45936    unsafe fn test_mm512_mask_getexp_ps() {
45937        let a = _mm512_set1_ps(3.);
45938        let r = _mm512_mask_getexp_ps(a, 0, a);
45939        assert_eq_m512(r, a);
45940        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
45941        let e = _mm512_setr_ps(
45942            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45943        );
45944        assert_eq_m512(r, e);
45945    }
45946
45947    #[simd_test(enable = "avx512f")]
45948    unsafe fn test_mm512_maskz_getexp_ps() {
45949        let a = _mm512_set1_ps(3.);
45950        let r = _mm512_maskz_getexp_ps(0, a);
45951        assert_eq_m512(r, _mm512_setzero_ps());
45952        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
45953        let e = _mm512_setr_ps(
45954            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45955        );
45956        assert_eq_m512(r, e);
45957    }
45958
45959    #[simd_test(enable = "avx512f,avx512vl")]
45960    unsafe fn test_mm256_getexp_ps() {
45961        let a = _mm256_set1_ps(3.);
45962        let r = _mm256_getexp_ps(a);
45963        let e = _mm256_set1_ps(1.);
45964        assert_eq_m256(r, e);
45965    }
45966
45967    #[simd_test(enable = "avx512f,avx512vl")]
45968    unsafe fn test_mm256_mask_getexp_ps() {
45969        let a = _mm256_set1_ps(3.);
45970        let r = _mm256_mask_getexp_ps(a, 0, a);
45971        assert_eq_m256(r, a);
45972        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
45973        let e = _mm256_set1_ps(1.);
45974        assert_eq_m256(r, e);
45975    }
45976
45977    #[simd_test(enable = "avx512f,avx512vl")]
45978    unsafe fn test_mm256_maskz_getexp_ps() {
45979        let a = _mm256_set1_ps(3.);
45980        let r = _mm256_maskz_getexp_ps(0, a);
45981        assert_eq_m256(r, _mm256_setzero_ps());
45982        let r = _mm256_maskz_getexp_ps(0b11111111, a);
45983        let e = _mm256_set1_ps(1.);
45984        assert_eq_m256(r, e);
45985    }
45986
45987    #[simd_test(enable = "avx512f,avx512vl")]
45988    unsafe fn test_mm_getexp_ps() {
45989        let a = _mm_set1_ps(3.);
45990        let r = _mm_getexp_ps(a);
45991        let e = _mm_set1_ps(1.);
45992        assert_eq_m128(r, e);
45993    }
45994
45995    #[simd_test(enable = "avx512f,avx512vl")]
45996    unsafe fn test_mm_mask_getexp_ps() {
45997        let a = _mm_set1_ps(3.);
45998        let r = _mm_mask_getexp_ps(a, 0, a);
45999        assert_eq_m128(r, a);
46000        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
46001        let e = _mm_set1_ps(1.);
46002        assert_eq_m128(r, e);
46003    }
46004
46005    #[simd_test(enable = "avx512f,avx512vl")]
46006    unsafe fn test_mm_maskz_getexp_ps() {
46007        let a = _mm_set1_ps(3.);
46008        let r = _mm_maskz_getexp_ps(0, a);
46009        assert_eq_m128(r, _mm_setzero_ps());
46010        let r = _mm_maskz_getexp_ps(0b00001111, a);
46011        let e = _mm_set1_ps(1.);
46012        assert_eq_m128(r, e);
46013    }
46014
46015    #[simd_test(enable = "avx512f")]
46016    unsafe fn test_mm512_roundscale_ps() {
46017        let a = _mm512_set1_ps(1.1);
46018        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46019        let e = _mm512_set1_ps(1.0);
46020        assert_eq_m512(r, e);
46021    }
46022
46023    #[simd_test(enable = "avx512f")]
46024    unsafe fn test_mm512_mask_roundscale_ps() {
46025        let a = _mm512_set1_ps(1.1);
46026        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46027        let e = _mm512_set1_ps(1.1);
46028        assert_eq_m512(r, e);
46029        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46030        let e = _mm512_set1_ps(1.0);
46031        assert_eq_m512(r, e);
46032    }
46033
46034    #[simd_test(enable = "avx512f")]
46035    unsafe fn test_mm512_maskz_roundscale_ps() {
46036        let a = _mm512_set1_ps(1.1);
46037        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46038        assert_eq_m512(r, _mm512_setzero_ps());
46039        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46040        let e = _mm512_set1_ps(1.0);
46041        assert_eq_m512(r, e);
46042    }
46043
46044    #[simd_test(enable = "avx512f,avx512vl")]
46045    unsafe fn test_mm256_roundscale_ps() {
46046        let a = _mm256_set1_ps(1.1);
46047        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46048        let e = _mm256_set1_ps(1.0);
46049        assert_eq_m256(r, e);
46050    }
46051
46052    #[simd_test(enable = "avx512f,avx512vl")]
46053    unsafe fn test_mm256_mask_roundscale_ps() {
46054        let a = _mm256_set1_ps(1.1);
46055        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46056        let e = _mm256_set1_ps(1.1);
46057        assert_eq_m256(r, e);
46058        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46059        let e = _mm256_set1_ps(1.0);
46060        assert_eq_m256(r, e);
46061    }
46062
46063    #[simd_test(enable = "avx512f,avx512vl")]
46064    unsafe fn test_mm256_maskz_roundscale_ps() {
46065        let a = _mm256_set1_ps(1.1);
46066        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46067        assert_eq_m256(r, _mm256_setzero_ps());
46068        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46069        let e = _mm256_set1_ps(1.0);
46070        assert_eq_m256(r, e);
46071    }
46072
46073    #[simd_test(enable = "avx512f,avx512vl")]
46074    unsafe fn test_mm_roundscale_ps() {
46075        let a = _mm_set1_ps(1.1);
46076        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46077        let e = _mm_set1_ps(1.0);
46078        assert_eq_m128(r, e);
46079    }
46080
46081    #[simd_test(enable = "avx512f,avx512vl")]
46082    unsafe fn test_mm_mask_roundscale_ps() {
46083        let a = _mm_set1_ps(1.1);
46084        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46085        let e = _mm_set1_ps(1.1);
46086        assert_eq_m128(r, e);
46087        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46088        let e = _mm_set1_ps(1.0);
46089        assert_eq_m128(r, e);
46090    }
46091
46092    #[simd_test(enable = "avx512f,avx512vl")]
46093    unsafe fn test_mm_maskz_roundscale_ps() {
46094        let a = _mm_set1_ps(1.1);
46095        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46096        assert_eq_m128(r, _mm_setzero_ps());
46097        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46098        let e = _mm_set1_ps(1.0);
46099        assert_eq_m128(r, e);
46100    }
46101
46102    #[simd_test(enable = "avx512f")]
46103    unsafe fn test_mm512_scalef_ps() {
46104        let a = _mm512_set1_ps(1.);
46105        let b = _mm512_set1_ps(3.);
46106        let r = _mm512_scalef_ps(a, b);
46107        let e = _mm512_set1_ps(8.);
46108        assert_eq_m512(r, e);
46109    }
46110
46111    #[simd_test(enable = "avx512f")]
46112    unsafe fn test_mm512_mask_scalef_ps() {
46113        let a = _mm512_set1_ps(1.);
46114        let b = _mm512_set1_ps(3.);
46115        let r = _mm512_mask_scalef_ps(a, 0, a, b);
46116        assert_eq_m512(r, a);
46117        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46118        let e = _mm512_set_ps(
46119            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46120        );
46121        assert_eq_m512(r, e);
46122    }
46123
46124    #[simd_test(enable = "avx512f")]
46125    unsafe fn test_mm512_maskz_scalef_ps() {
46126        let a = _mm512_set1_ps(1.);
46127        let b = _mm512_set1_ps(3.);
46128        let r = _mm512_maskz_scalef_ps(0, a, b);
46129        assert_eq_m512(r, _mm512_setzero_ps());
46130        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46131        let e = _mm512_set_ps(
46132            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46133        );
46134        assert_eq_m512(r, e);
46135    }
46136
46137    #[simd_test(enable = "avx512f,avx512vl")]
46138    unsafe fn test_mm256_scalef_ps() {
46139        let a = _mm256_set1_ps(1.);
46140        let b = _mm256_set1_ps(3.);
46141        let r = _mm256_scalef_ps(a, b);
46142        let e = _mm256_set1_ps(8.);
46143        assert_eq_m256(r, e);
46144    }
46145
46146    #[simd_test(enable = "avx512f,avx512vl")]
46147    unsafe fn test_mm256_mask_scalef_ps() {
46148        let a = _mm256_set1_ps(1.);
46149        let b = _mm256_set1_ps(3.);
46150        let r = _mm256_mask_scalef_ps(a, 0, a, b);
46151        assert_eq_m256(r, a);
46152        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46153        let e = _mm256_set1_ps(8.);
46154        assert_eq_m256(r, e);
46155    }
46156
46157    #[simd_test(enable = "avx512f,avx512vl")]
46158    unsafe fn test_mm256_maskz_scalef_ps() {
46159        let a = _mm256_set1_ps(1.);
46160        let b = _mm256_set1_ps(3.);
46161        let r = _mm256_maskz_scalef_ps(0, a, b);
46162        assert_eq_m256(r, _mm256_setzero_ps());
46163        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46164        let e = _mm256_set1_ps(8.);
46165        assert_eq_m256(r, e);
46166    }
46167
46168    #[simd_test(enable = "avx512f,avx512vl")]
46169    unsafe fn test_mm_scalef_ps() {
46170        let a = _mm_set1_ps(1.);
46171        let b = _mm_set1_ps(3.);
46172        let r = _mm_scalef_ps(a, b);
46173        let e = _mm_set1_ps(8.);
46174        assert_eq_m128(r, e);
46175    }
46176
46177    #[simd_test(enable = "avx512f,avx512vl")]
46178    unsafe fn test_mm_mask_scalef_ps() {
46179        let a = _mm_set1_ps(1.);
46180        let b = _mm_set1_ps(3.);
46181        let r = _mm_mask_scalef_ps(a, 0, a, b);
46182        assert_eq_m128(r, a);
46183        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46184        let e = _mm_set1_ps(8.);
46185        assert_eq_m128(r, e);
46186    }
46187
46188    #[simd_test(enable = "avx512f,avx512vl")]
46189    unsafe fn test_mm_maskz_scalef_ps() {
46190        let a = _mm_set1_ps(1.);
46191        let b = _mm_set1_ps(3.);
46192        let r = _mm_maskz_scalef_ps(0, a, b);
46193        assert_eq_m128(r, _mm_setzero_ps());
46194        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46195        let e = _mm_set1_ps(8.);
46196        assert_eq_m128(r, e);
46197    }
46198
46199    #[simd_test(enable = "avx512f")]
46200    unsafe fn test_mm512_fixupimm_ps() {
46201        let a = _mm512_set1_ps(f32::NAN);
46202        let b = _mm512_set1_ps(f32::MAX);
46203        let c = _mm512_set1_epi32(i32::MAX);
46204        //let r = _mm512_fixupimm_ps(a, b, c, 5);
46205        let r = _mm512_fixupimm_ps::<5>(a, b, c);
46206        let e = _mm512_set1_ps(0.0);
46207        assert_eq_m512(r, e);
46208    }
46209
46210    #[simd_test(enable = "avx512f")]
46211    unsafe fn test_mm512_mask_fixupimm_ps() {
46212        #[rustfmt::skip]
46213        let a = _mm512_set_ps(
46214            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46215            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46216            1., 1., 1., 1.,
46217            1., 1., 1., 1.,
46218        );
46219        let b = _mm512_set1_ps(f32::MAX);
46220        let c = _mm512_set1_epi32(i32::MAX);
46221        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46222        let e = _mm512_set_ps(
46223            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46224        );
46225        assert_eq_m512(r, e);
46226    }
46227
46228    #[simd_test(enable = "avx512f")]
46229    unsafe fn test_mm512_maskz_fixupimm_ps() {
46230        #[rustfmt::skip]
46231        let a = _mm512_set_ps(
46232            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46233            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46234            1., 1., 1., 1.,
46235            1., 1., 1., 1.,
46236        );
46237        let b = _mm512_set1_ps(f32::MAX);
46238        let c = _mm512_set1_epi32(i32::MAX);
46239        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46240        let e = _mm512_set_ps(
46241            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46242        );
46243        assert_eq_m512(r, e);
46244    }
46245
46246    #[simd_test(enable = "avx512f,avx512vl")]
46247    unsafe fn test_mm256_fixupimm_ps() {
46248        let a = _mm256_set1_ps(f32::NAN);
46249        let b = _mm256_set1_ps(f32::MAX);
46250        let c = _mm256_set1_epi32(i32::MAX);
46251        let r = _mm256_fixupimm_ps::<5>(a, b, c);
46252        let e = _mm256_set1_ps(0.0);
46253        assert_eq_m256(r, e);
46254    }
46255
46256    #[simd_test(enable = "avx512f,avx512vl")]
46257    unsafe fn test_mm256_mask_fixupimm_ps() {
46258        let a = _mm256_set1_ps(f32::NAN);
46259        let b = _mm256_set1_ps(f32::MAX);
46260        let c = _mm256_set1_epi32(i32::MAX);
46261        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46262        let e = _mm256_set1_ps(0.0);
46263        assert_eq_m256(r, e);
46264    }
46265
46266    #[simd_test(enable = "avx512f,avx512vl")]
46267    unsafe fn test_mm256_maskz_fixupimm_ps() {
46268        let a = _mm256_set1_ps(f32::NAN);
46269        let b = _mm256_set1_ps(f32::MAX);
46270        let c = _mm256_set1_epi32(i32::MAX);
46271        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46272        let e = _mm256_set1_ps(0.0);
46273        assert_eq_m256(r, e);
46274    }
46275
46276    #[simd_test(enable = "avx512f,avx512vl")]
46277    unsafe fn test_mm_fixupimm_ps() {
46278        let a = _mm_set1_ps(f32::NAN);
46279        let b = _mm_set1_ps(f32::MAX);
46280        let c = _mm_set1_epi32(i32::MAX);
46281        let r = _mm_fixupimm_ps::<5>(a, b, c);
46282        let e = _mm_set1_ps(0.0);
46283        assert_eq_m128(r, e);
46284    }
46285
46286    #[simd_test(enable = "avx512f,avx512vl")]
46287    unsafe fn test_mm_mask_fixupimm_ps() {
46288        let a = _mm_set1_ps(f32::NAN);
46289        let b = _mm_set1_ps(f32::MAX);
46290        let c = _mm_set1_epi32(i32::MAX);
46291        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46292        let e = _mm_set1_ps(0.0);
46293        assert_eq_m128(r, e);
46294    }
46295
46296    #[simd_test(enable = "avx512f,avx512vl")]
46297    unsafe fn test_mm_maskz_fixupimm_ps() {
46298        let a = _mm_set1_ps(f32::NAN);
46299        let b = _mm_set1_ps(f32::MAX);
46300        let c = _mm_set1_epi32(i32::MAX);
46301        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46302        let e = _mm_set1_ps(0.0);
46303        assert_eq_m128(r, e);
46304    }
46305
46306    #[simd_test(enable = "avx512f")]
46307    unsafe fn test_mm512_ternarylogic_epi32() {
46308        let a = _mm512_set1_epi32(1 << 2);
46309        let b = _mm512_set1_epi32(1 << 1);
46310        let c = _mm512_set1_epi32(1 << 0);
46311        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46312        let e = _mm512_set1_epi32(0);
46313        assert_eq_m512i(r, e);
46314    }
46315
46316    #[simd_test(enable = "avx512f")]
46317    unsafe fn test_mm512_mask_ternarylogic_epi32() {
46318        let src = _mm512_set1_epi32(1 << 2);
46319        let a = _mm512_set1_epi32(1 << 1);
46320        let b = _mm512_set1_epi32(1 << 0);
46321        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46322        assert_eq_m512i(r, src);
46323        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46324        let e = _mm512_set1_epi32(0);
46325        assert_eq_m512i(r, e);
46326    }
46327
46328    #[simd_test(enable = "avx512f")]
46329    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46330        let a = _mm512_set1_epi32(1 << 2);
46331        let b = _mm512_set1_epi32(1 << 1);
46332        let c = _mm512_set1_epi32(1 << 0);
46333        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46334        assert_eq_m512i(r, _mm512_setzero_si512());
46335        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46336        let e = _mm512_set1_epi32(0);
46337        assert_eq_m512i(r, e);
46338    }
46339
46340    #[simd_test(enable = "avx512f,avx512vl")]
46341    unsafe fn test_mm256_ternarylogic_epi32() {
46342        let a = _mm256_set1_epi32(1 << 2);
46343        let b = _mm256_set1_epi32(1 << 1);
46344        let c = _mm256_set1_epi32(1 << 0);
46345        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46346        let e = _mm256_set1_epi32(0);
46347        assert_eq_m256i(r, e);
46348    }
46349
46350    #[simd_test(enable = "avx512f,avx512vl")]
46351    unsafe fn test_mm256_mask_ternarylogic_epi32() {
46352        let src = _mm256_set1_epi32(1 << 2);
46353        let a = _mm256_set1_epi32(1 << 1);
46354        let b = _mm256_set1_epi32(1 << 0);
46355        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46356        assert_eq_m256i(r, src);
46357        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46358        let e = _mm256_set1_epi32(0);
46359        assert_eq_m256i(r, e);
46360    }
46361
46362    #[simd_test(enable = "avx512f,avx512vl")]
46363    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46364        let a = _mm256_set1_epi32(1 << 2);
46365        let b = _mm256_set1_epi32(1 << 1);
46366        let c = _mm256_set1_epi32(1 << 0);
46367        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46368        assert_eq_m256i(r, _mm256_setzero_si256());
46369        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46370        let e = _mm256_set1_epi32(0);
46371        assert_eq_m256i(r, e);
46372    }
46373
46374    #[simd_test(enable = "avx512f,avx512vl")]
46375    unsafe fn test_mm_ternarylogic_epi32() {
46376        let a = _mm_set1_epi32(1 << 2);
46377        let b = _mm_set1_epi32(1 << 1);
46378        let c = _mm_set1_epi32(1 << 0);
46379        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46380        let e = _mm_set1_epi32(0);
46381        assert_eq_m128i(r, e);
46382    }
46383
46384    #[simd_test(enable = "avx512f,avx512vl")]
46385    unsafe fn test_mm_mask_ternarylogic_epi32() {
46386        let src = _mm_set1_epi32(1 << 2);
46387        let a = _mm_set1_epi32(1 << 1);
46388        let b = _mm_set1_epi32(1 << 0);
46389        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46390        assert_eq_m128i(r, src);
46391        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46392        let e = _mm_set1_epi32(0);
46393        assert_eq_m128i(r, e);
46394    }
46395
46396    #[simd_test(enable = "avx512f,avx512vl")]
46397    unsafe fn test_mm_maskz_ternarylogic_epi32() {
46398        let a = _mm_set1_epi32(1 << 2);
46399        let b = _mm_set1_epi32(1 << 1);
46400        let c = _mm_set1_epi32(1 << 0);
46401        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46402        assert_eq_m128i(r, _mm_setzero_si128());
46403        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46404        let e = _mm_set1_epi32(0);
46405        assert_eq_m128i(r, e);
46406    }
46407
46408    #[simd_test(enable = "avx512f")]
46409    unsafe fn test_mm512_getmant_ps() {
46410        let a = _mm512_set1_ps(10.);
46411        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46412        let e = _mm512_set1_ps(1.25);
46413        assert_eq_m512(r, e);
46414    }
46415
46416    #[simd_test(enable = "avx512f")]
46417    unsafe fn test_mm512_mask_getmant_ps() {
46418        let a = _mm512_set1_ps(10.);
46419        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46420        assert_eq_m512(r, a);
46421        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46422            a,
46423            0b11111111_00000000,
46424            a,
46425        );
46426        let e = _mm512_setr_ps(
46427            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46428        );
46429        assert_eq_m512(r, e);
46430    }
46431
46432    #[simd_test(enable = "avx512f")]
46433    unsafe fn test_mm512_maskz_getmant_ps() {
46434        let a = _mm512_set1_ps(10.);
46435        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46436        assert_eq_m512(r, _mm512_setzero_ps());
46437        let r =
46438            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46439        let e = _mm512_setr_ps(
46440            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46441        );
46442        assert_eq_m512(r, e);
46443    }
46444
46445    #[simd_test(enable = "avx512f,avx512vl")]
46446    unsafe fn test_mm256_getmant_ps() {
46447        let a = _mm256_set1_ps(10.);
46448        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46449        let e = _mm256_set1_ps(1.25);
46450        assert_eq_m256(r, e);
46451    }
46452
46453    #[simd_test(enable = "avx512f,avx512vl")]
46454    unsafe fn test_mm256_mask_getmant_ps() {
46455        let a = _mm256_set1_ps(10.);
46456        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46457        assert_eq_m256(r, a);
46458        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46459        let e = _mm256_set1_ps(1.25);
46460        assert_eq_m256(r, e);
46461    }
46462
46463    #[simd_test(enable = "avx512f,avx512vl")]
46464    unsafe fn test_mm256_maskz_getmant_ps() {
46465        let a = _mm256_set1_ps(10.);
46466        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46467        assert_eq_m256(r, _mm256_setzero_ps());
46468        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46469        let e = _mm256_set1_ps(1.25);
46470        assert_eq_m256(r, e);
46471    }
46472
46473    #[simd_test(enable = "avx512f,avx512vl")]
46474    unsafe fn test_mm_getmant_ps() {
46475        let a = _mm_set1_ps(10.);
46476        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46477        let e = _mm_set1_ps(1.25);
46478        assert_eq_m128(r, e);
46479    }
46480
46481    #[simd_test(enable = "avx512f,avx512vl")]
46482    unsafe fn test_mm_mask_getmant_ps() {
46483        let a = _mm_set1_ps(10.);
46484        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46485        assert_eq_m128(r, a);
46486        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46487        let e = _mm_set1_ps(1.25);
46488        assert_eq_m128(r, e);
46489    }
46490
46491    #[simd_test(enable = "avx512f,avx512vl")]
46492    unsafe fn test_mm_maskz_getmant_ps() {
46493        let a = _mm_set1_ps(10.);
46494        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46495        assert_eq_m128(r, _mm_setzero_ps());
46496        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46497        let e = _mm_set1_ps(1.25);
46498        assert_eq_m128(r, e);
46499    }
46500
46501    #[simd_test(enable = "avx512f")]
46502    unsafe fn test_mm512_add_round_ps() {
46503        let a = _mm512_setr_ps(
46504            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46505        );
46506        let b = _mm512_set1_ps(-1.);
46507        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46508        #[rustfmt::skip]
46509        let e = _mm512_setr_ps(
46510            -1., 0.5, 1., 2.5,
46511            3., 4.5, 5., 6.5,
46512            7., 8.5, 9., 10.5,
46513            11., 12.5, 13., -0.99999994,
46514        );
46515        assert_eq_m512(r, e);
46516        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46517        let e = _mm512_setr_ps(
46518            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46519        );
46520        assert_eq_m512(r, e);
46521    }
46522
46523    #[simd_test(enable = "avx512f")]
46524    unsafe fn test_mm512_mask_add_round_ps() {
46525        let a = _mm512_setr_ps(
46526            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46527        );
46528        let b = _mm512_set1_ps(-1.);
46529        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46530        assert_eq_m512(r, a);
46531        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46532            a,
46533            0b11111111_00000000,
46534            a,
46535            b,
46536        );
46537        #[rustfmt::skip]
46538        let e = _mm512_setr_ps(
46539            0., 1.5, 2., 3.5,
46540            4., 5.5, 6., 7.5,
46541            7., 8.5, 9., 10.5,
46542            11., 12.5, 13., -0.99999994,
46543        );
46544        assert_eq_m512(r, e);
46545    }
46546
46547    #[simd_test(enable = "avx512f")]
46548    unsafe fn test_mm512_maskz_add_round_ps() {
46549        let a = _mm512_setr_ps(
46550            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46551        );
46552        let b = _mm512_set1_ps(-1.);
46553        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46554        assert_eq_m512(r, _mm512_setzero_ps());
46555        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46556            0b11111111_00000000,
46557            a,
46558            b,
46559        );
46560        #[rustfmt::skip]
46561        let e = _mm512_setr_ps(
46562            0., 0., 0., 0.,
46563            0., 0., 0., 0.,
46564            7., 8.5, 9., 10.5,
46565            11., 12.5, 13., -0.99999994,
46566        );
46567        assert_eq_m512(r, e);
46568    }
46569
46570    #[simd_test(enable = "avx512f")]
46571    unsafe fn test_mm512_sub_round_ps() {
46572        let a = _mm512_setr_ps(
46573            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46574        );
46575        let b = _mm512_set1_ps(1.);
46576        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46577        #[rustfmt::skip]
46578        let e = _mm512_setr_ps(
46579            -1., 0.5, 1., 2.5,
46580            3., 4.5, 5., 6.5,
46581            7., 8.5, 9., 10.5,
46582            11., 12.5, 13., -0.99999994,
46583        );
46584        assert_eq_m512(r, e);
46585        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46586        let e = _mm512_setr_ps(
46587            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46588        );
46589        assert_eq_m512(r, e);
46590    }
46591
46592    #[simd_test(enable = "avx512f")]
46593    unsafe fn test_mm512_mask_sub_round_ps() {
46594        let a = _mm512_setr_ps(
46595            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46596        );
46597        let b = _mm512_set1_ps(1.);
46598        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46599            a, 0, a, b,
46600        );
46601        assert_eq_m512(r, a);
46602        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46603            a,
46604            0b11111111_00000000,
46605            a,
46606            b,
46607        );
46608        #[rustfmt::skip]
46609        let e = _mm512_setr_ps(
46610            0., 1.5, 2., 3.5,
46611            4., 5.5, 6., 7.5,
46612            7., 8.5, 9., 10.5,
46613            11., 12.5, 13., -0.99999994,
46614        );
46615        assert_eq_m512(r, e);
46616    }
46617
46618    #[simd_test(enable = "avx512f")]
46619    unsafe fn test_mm512_maskz_sub_round_ps() {
46620        let a = _mm512_setr_ps(
46621            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46622        );
46623        let b = _mm512_set1_ps(1.);
46624        let r =
46625            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46626        assert_eq_m512(r, _mm512_setzero_ps());
46627        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46628            0b11111111_00000000,
46629            a,
46630            b,
46631        );
46632        #[rustfmt::skip]
46633        let e = _mm512_setr_ps(
46634            0., 0., 0., 0.,
46635            0., 0., 0., 0.,
46636            7., 8.5, 9., 10.5,
46637            11., 12.5, 13., -0.99999994,
46638        );
46639        assert_eq_m512(r, e);
46640    }
46641
46642    #[simd_test(enable = "avx512f")]
46643    unsafe fn test_mm512_mul_round_ps() {
46644        #[rustfmt::skip]
46645        let a = _mm512_setr_ps(
46646            0., 1.5, 2., 3.5,
46647            4., 5.5, 6., 7.5,
46648            8., 9.5, 10., 11.5,
46649            12., 13.5, 14., 0.00000000000000000000007,
46650        );
46651        let b = _mm512_set1_ps(0.1);
46652        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46653        #[rustfmt::skip]
46654        let e = _mm512_setr_ps(
46655            0., 0.15, 0.2, 0.35,
46656            0.4, 0.55, 0.6, 0.75,
46657            0.8, 0.95, 1.0, 1.15,
46658            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46659        );
46660        assert_eq_m512(r, e);
46661        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46662        #[rustfmt::skip]
46663        let e = _mm512_setr_ps(
46664            0., 0.14999999, 0.2, 0.35,
46665            0.4, 0.54999995, 0.59999996, 0.75,
46666            0.8, 0.95, 1.0, 1.15,
46667            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46668        );
46669        assert_eq_m512(r, e);
46670    }
46671
46672    #[simd_test(enable = "avx512f")]
46673    unsafe fn test_mm512_mask_mul_round_ps() {
46674        #[rustfmt::skip]
46675        let a = _mm512_setr_ps(
46676            0., 1.5, 2., 3.5,
46677            4., 5.5, 6., 7.5,
46678            8., 9.5, 10., 11.5,
46679            12., 13.5, 14., 0.00000000000000000000007,
46680        );
46681        let b = _mm512_set1_ps(0.1);
46682        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46683            a, 0, a, b,
46684        );
46685        assert_eq_m512(r, a);
46686        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46687            a,
46688            0b11111111_00000000,
46689            a,
46690            b,
46691        );
46692        #[rustfmt::skip]
46693        let e = _mm512_setr_ps(
46694            0., 1.5, 2., 3.5,
46695            4., 5.5, 6., 7.5,
46696            0.8, 0.95, 1.0, 1.15,
46697            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46698        );
46699        assert_eq_m512(r, e);
46700    }
46701
46702    #[simd_test(enable = "avx512f")]
46703    unsafe fn test_mm512_maskz_mul_round_ps() {
46704        #[rustfmt::skip]
46705        let a = _mm512_setr_ps(
46706            0., 1.5, 2., 3.5,
46707            4., 5.5, 6., 7.5,
46708            8., 9.5, 10., 11.5,
46709            12., 13.5, 14., 0.00000000000000000000007,
46710        );
46711        let b = _mm512_set1_ps(0.1);
46712        let r =
46713            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46714        assert_eq_m512(r, _mm512_setzero_ps());
46715        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46716            0b11111111_00000000,
46717            a,
46718            b,
46719        );
46720        #[rustfmt::skip]
46721        let e = _mm512_setr_ps(
46722            0., 0., 0., 0.,
46723            0., 0., 0., 0.,
46724            0.8, 0.95, 1.0, 1.15,
46725            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46726        );
46727        assert_eq_m512(r, e);
46728    }
46729
46730    #[simd_test(enable = "avx512f")]
46731    unsafe fn test_mm512_div_round_ps() {
46732        let a = _mm512_set1_ps(1.);
46733        let b = _mm512_set1_ps(3.);
46734        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46735        let e = _mm512_set1_ps(0.33333334);
46736        assert_eq_m512(r, e);
46737        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46738        let e = _mm512_set1_ps(0.3333333);
46739        assert_eq_m512(r, e);
46740    }
46741
46742    #[simd_test(enable = "avx512f")]
46743    unsafe fn test_mm512_mask_div_round_ps() {
46744        let a = _mm512_set1_ps(1.);
46745        let b = _mm512_set1_ps(3.);
46746        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46747            a, 0, a, b,
46748        );
46749        assert_eq_m512(r, a);
46750        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46751            a,
46752            0b11111111_00000000,
46753            a,
46754            b,
46755        );
46756        let e = _mm512_setr_ps(
46757            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46758            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46759        );
46760        assert_eq_m512(r, e);
46761    }
46762
46763    #[simd_test(enable = "avx512f")]
46764    unsafe fn test_mm512_maskz_div_round_ps() {
46765        let a = _mm512_set1_ps(1.);
46766        let b = _mm512_set1_ps(3.);
46767        let r =
46768            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46769        assert_eq_m512(r, _mm512_setzero_ps());
46770        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46771            0b11111111_00000000,
46772            a,
46773            b,
46774        );
46775        let e = _mm512_setr_ps(
46776            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46777            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46778        );
46779        assert_eq_m512(r, e);
46780    }
46781
46782    #[simd_test(enable = "avx512f")]
46783    unsafe fn test_mm512_sqrt_round_ps() {
46784        let a = _mm512_set1_ps(3.);
46785        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46786        let e = _mm512_set1_ps(1.7320508);
46787        assert_eq_m512(r, e);
46788        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46789        let e = _mm512_set1_ps(1.7320509);
46790        assert_eq_m512(r, e);
46791    }
46792
46793    #[simd_test(enable = "avx512f")]
46794    unsafe fn test_mm512_mask_sqrt_round_ps() {
46795        let a = _mm512_set1_ps(3.);
46796        let r =
46797            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46798        assert_eq_m512(r, a);
46799        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46800            a,
46801            0b11111111_00000000,
46802            a,
46803        );
46804        let e = _mm512_setr_ps(
46805            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46806            1.7320508, 1.7320508, 1.7320508,
46807        );
46808        assert_eq_m512(r, e);
46809    }
46810
46811    #[simd_test(enable = "avx512f")]
46812    unsafe fn test_mm512_maskz_sqrt_round_ps() {
46813        let a = _mm512_set1_ps(3.);
46814        let r =
46815            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46816        assert_eq_m512(r, _mm512_setzero_ps());
46817        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46818            0b11111111_00000000,
46819            a,
46820        );
46821        let e = _mm512_setr_ps(
46822            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46823            1.7320508, 1.7320508, 1.7320508,
46824        );
46825        assert_eq_m512(r, e);
46826    }
46827
46828    #[simd_test(enable = "avx512f")]
46829    unsafe fn test_mm512_fmadd_round_ps() {
46830        let a = _mm512_set1_ps(0.00000007);
46831        let b = _mm512_set1_ps(1.);
46832        let c = _mm512_set1_ps(-1.);
46833        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46834        let e = _mm512_set1_ps(-0.99999994);
46835        assert_eq_m512(r, e);
46836        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46837        let e = _mm512_set1_ps(-0.9999999);
46838        assert_eq_m512(r, e);
46839    }
46840
46841    #[simd_test(enable = "avx512f")]
46842    unsafe fn test_mm512_mask_fmadd_round_ps() {
46843        let a = _mm512_set1_ps(0.00000007);
46844        let b = _mm512_set1_ps(1.);
46845        let c = _mm512_set1_ps(-1.);
46846        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46847            a, 0, b, c,
46848        );
46849        assert_eq_m512(r, a);
46850        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46851            a,
46852            0b00000000_11111111,
46853            b,
46854            c,
46855        );
46856        #[rustfmt::skip]
46857        let e = _mm512_setr_ps(
46858            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46859            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46860            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46861            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46862        );
46863        assert_eq_m512(r, e);
46864    }
46865
46866    #[simd_test(enable = "avx512f")]
46867    unsafe fn test_mm512_maskz_fmadd_round_ps() {
46868        let a = _mm512_set1_ps(0.00000007);
46869        let b = _mm512_set1_ps(1.);
46870        let c = _mm512_set1_ps(-1.);
46871        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46872            0, a, b, c,
46873        );
46874        assert_eq_m512(r, _mm512_setzero_ps());
46875        #[rustfmt::skip]
46876        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46877            0b00000000_11111111,
46878            a,
46879            b,
46880            c,
46881        );
46882        #[rustfmt::skip]
46883        let e = _mm512_setr_ps(
46884            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46885            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46886            0., 0., 0., 0.,
46887            0., 0., 0., 0.,
46888        );
46889        assert_eq_m512(r, e);
46890    }
46891
46892    #[simd_test(enable = "avx512f")]
46893    unsafe fn test_mm512_mask3_fmadd_round_ps() {
46894        let a = _mm512_set1_ps(0.00000007);
46895        let b = _mm512_set1_ps(1.);
46896        let c = _mm512_set1_ps(-1.);
46897        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46898            a, b, c, 0,
46899        );
46900        assert_eq_m512(r, c);
46901        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46902            a,
46903            b,
46904            c,
46905            0b00000000_11111111,
46906        );
46907        #[rustfmt::skip]
46908        let e = _mm512_setr_ps(
46909            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46910            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46911            -1., -1., -1., -1.,
46912            -1., -1., -1., -1.,
46913        );
46914        assert_eq_m512(r, e);
46915    }
46916
46917    #[simd_test(enable = "avx512f")]
46918    unsafe fn test_mm512_fmsub_round_ps() {
46919        let a = _mm512_set1_ps(0.00000007);
46920        let b = _mm512_set1_ps(1.);
46921        let c = _mm512_set1_ps(1.);
46922        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46923        let e = _mm512_set1_ps(-0.99999994);
46924        assert_eq_m512(r, e);
46925        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46926        let e = _mm512_set1_ps(-0.9999999);
46927        assert_eq_m512(r, e);
46928    }
46929
46930    #[simd_test(enable = "avx512f")]
46931    unsafe fn test_mm512_mask_fmsub_round_ps() {
46932        let a = _mm512_set1_ps(0.00000007);
46933        let b = _mm512_set1_ps(1.);
46934        let c = _mm512_set1_ps(1.);
46935        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46936            a, 0, b, c,
46937        );
46938        assert_eq_m512(r, a);
46939        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46940            a,
46941            0b00000000_11111111,
46942            b,
46943            c,
46944        );
46945        #[rustfmt::skip]
46946        let e = _mm512_setr_ps(
46947            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46948            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46949            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46950            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46951        );
46952        assert_eq_m512(r, e);
46953    }
46954
46955    #[simd_test(enable = "avx512f")]
46956    unsafe fn test_mm512_maskz_fmsub_round_ps() {
46957        let a = _mm512_set1_ps(0.00000007);
46958        let b = _mm512_set1_ps(1.);
46959        let c = _mm512_set1_ps(1.);
46960        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46961            0, a, b, c,
46962        );
46963        assert_eq_m512(r, _mm512_setzero_ps());
46964        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46965            0b00000000_11111111,
46966            a,
46967            b,
46968            c,
46969        );
46970        #[rustfmt::skip]
46971        let e = _mm512_setr_ps(
46972            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46973            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46974            0., 0., 0., 0.,
46975            0., 0., 0., 0.,
46976        );
46977        assert_eq_m512(r, e);
46978    }
46979
46980    #[simd_test(enable = "avx512f")]
46981    unsafe fn test_mm512_mask3_fmsub_round_ps() {
46982        let a = _mm512_set1_ps(0.00000007);
46983        let b = _mm512_set1_ps(1.);
46984        let c = _mm512_set1_ps(1.);
46985        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46986            a, b, c, 0,
46987        );
46988        assert_eq_m512(r, c);
46989        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46990            a,
46991            b,
46992            c,
46993            0b00000000_11111111,
46994        );
46995        #[rustfmt::skip]
46996        let e = _mm512_setr_ps(
46997            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46998            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46999            1., 1., 1., 1.,
47000            1., 1., 1., 1.,
47001        );
47002        assert_eq_m512(r, e);
47003    }
47004
47005    #[simd_test(enable = "avx512f")]
47006    unsafe fn test_mm512_fmaddsub_round_ps() {
47007        let a = _mm512_set1_ps(0.00000007);
47008        let b = _mm512_set1_ps(1.);
47009        let c = _mm512_set1_ps(-1.);
47010        let r =
47011            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47012        #[rustfmt::skip]
47013        let e = _mm512_setr_ps(
47014            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47015            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47016            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47017            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47018        );
47019        assert_eq_m512(r, e);
47020        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47021        let e = _mm512_setr_ps(
47022            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47023            -0.9999999, 1., -0.9999999, 1., -0.9999999,
47024        );
47025        assert_eq_m512(r, e);
47026    }
47027
47028    #[simd_test(enable = "avx512f")]
47029    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47030        let a = _mm512_set1_ps(0.00000007);
47031        let b = _mm512_set1_ps(1.);
47032        let c = _mm512_set1_ps(-1.);
47033        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47034            a, 0, b, c,
47035        );
47036        assert_eq_m512(r, a);
47037        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47038            a,
47039            0b00000000_11111111,
47040            b,
47041            c,
47042        );
47043        #[rustfmt::skip]
47044        let e = _mm512_setr_ps(
47045            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47046            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47047            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47048            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47049        );
47050        assert_eq_m512(r, e);
47051    }
47052
47053    #[simd_test(enable = "avx512f")]
47054    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47055        let a = _mm512_set1_ps(0.00000007);
47056        let b = _mm512_set1_ps(1.);
47057        let c = _mm512_set1_ps(-1.);
47058        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47059            0, a, b, c,
47060        );
47061        assert_eq_m512(r, _mm512_setzero_ps());
47062        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47063            0b00000000_11111111,
47064            a,
47065            b,
47066            c,
47067        );
47068        #[rustfmt::skip]
47069        let e = _mm512_setr_ps(
47070            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47071            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47072            0., 0., 0., 0.,
47073            0., 0., 0., 0.,
47074        );
47075        assert_eq_m512(r, e);
47076    }
47077
47078    #[simd_test(enable = "avx512f")]
47079    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47080        let a = _mm512_set1_ps(0.00000007);
47081        let b = _mm512_set1_ps(1.);
47082        let c = _mm512_set1_ps(-1.);
47083        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47084            a, b, c, 0,
47085        );
47086        assert_eq_m512(r, c);
47087        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47088            a,
47089            b,
47090            c,
47091            0b00000000_11111111,
47092        );
47093        #[rustfmt::skip]
47094        let e = _mm512_setr_ps(
47095            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47096            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47097            -1., -1., -1., -1.,
47098            -1., -1., -1., -1.,
47099        );
47100        assert_eq_m512(r, e);
47101    }
47102
47103    #[simd_test(enable = "avx512f")]
47104    unsafe fn test_mm512_fmsubadd_round_ps() {
47105        let a = _mm512_set1_ps(0.00000007);
47106        let b = _mm512_set1_ps(1.);
47107        let c = _mm512_set1_ps(-1.);
47108        let r =
47109            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47110        #[rustfmt::skip]
47111        let e = _mm512_setr_ps(
47112            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47113            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47114            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47115            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47116        );
47117        assert_eq_m512(r, e);
47118        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47119        let e = _mm512_setr_ps(
47120            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47121            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47122        );
47123        assert_eq_m512(r, e);
47124    }
47125
47126    #[simd_test(enable = "avx512f")]
47127    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47128        let a = _mm512_set1_ps(0.00000007);
47129        let b = _mm512_set1_ps(1.);
47130        let c = _mm512_set1_ps(-1.);
47131        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47132            a, 0, b, c,
47133        );
47134        assert_eq_m512(r, a);
47135        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47136            a,
47137            0b00000000_11111111,
47138            b,
47139            c,
47140        );
47141        #[rustfmt::skip]
47142        let e = _mm512_setr_ps(
47143            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47144            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47145            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47146            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47147        );
47148        assert_eq_m512(r, e);
47149    }
47150
47151    #[simd_test(enable = "avx512f")]
47152    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47153        let a = _mm512_set1_ps(0.00000007);
47154        let b = _mm512_set1_ps(1.);
47155        let c = _mm512_set1_ps(-1.);
47156        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47157            0, a, b, c,
47158        );
47159        assert_eq_m512(r, _mm512_setzero_ps());
47160        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47161            0b00000000_11111111,
47162            a,
47163            b,
47164            c,
47165        );
47166        #[rustfmt::skip]
47167        let e = _mm512_setr_ps(
47168            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47169            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47170            0., 0., 0., 0.,
47171            0., 0., 0., 0.,
47172        );
47173        assert_eq_m512(r, e);
47174    }
47175
47176    #[simd_test(enable = "avx512f")]
47177    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47178        let a = _mm512_set1_ps(0.00000007);
47179        let b = _mm512_set1_ps(1.);
47180        let c = _mm512_set1_ps(-1.);
47181        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47182            a, b, c, 0,
47183        );
47184        assert_eq_m512(r, c);
47185        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47186            a,
47187            b,
47188            c,
47189            0b00000000_11111111,
47190        );
47191        #[rustfmt::skip]
47192        let e = _mm512_setr_ps(
47193            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47194            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47195            -1., -1., -1., -1.,
47196            -1., -1., -1., -1.,
47197        );
47198        assert_eq_m512(r, e);
47199    }
47200
47201    #[simd_test(enable = "avx512f")]
47202    unsafe fn test_mm512_fnmadd_round_ps() {
47203        let a = _mm512_set1_ps(0.00000007);
47204        let b = _mm512_set1_ps(1.);
47205        let c = _mm512_set1_ps(1.);
47206        let r =
47207            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47208        let e = _mm512_set1_ps(0.99999994);
47209        assert_eq_m512(r, e);
47210        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47211        let e = _mm512_set1_ps(0.9999999);
47212        assert_eq_m512(r, e);
47213    }
47214
47215    #[simd_test(enable = "avx512f")]
47216    unsafe fn test_mm512_mask_fnmadd_round_ps() {
47217        let a = _mm512_set1_ps(0.00000007);
47218        let b = _mm512_set1_ps(1.);
47219        let c = _mm512_set1_ps(1.);
47220        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47221            a, 0, b, c,
47222        );
47223        assert_eq_m512(r, a);
47224        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47225            a,
47226            0b00000000_11111111,
47227            b,
47228            c,
47229        );
47230        let e = _mm512_setr_ps(
47231            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47232            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47233            0.00000007, 0.00000007,
47234        );
47235        assert_eq_m512(r, e);
47236    }
47237
47238    #[simd_test(enable = "avx512f")]
47239    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47240        let a = _mm512_set1_ps(0.00000007);
47241        let b = _mm512_set1_ps(1.);
47242        let c = _mm512_set1_ps(1.);
47243        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47244            0, a, b, c,
47245        );
47246        assert_eq_m512(r, _mm512_setzero_ps());
47247        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47248            0b00000000_11111111,
47249            a,
47250            b,
47251            c,
47252        );
47253        let e = _mm512_setr_ps(
47254            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47255            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47256        );
47257        assert_eq_m512(r, e);
47258    }
47259
47260    #[simd_test(enable = "avx512f")]
47261    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47262        let a = _mm512_set1_ps(0.00000007);
47263        let b = _mm512_set1_ps(1.);
47264        let c = _mm512_set1_ps(1.);
47265        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47266            a, b, c, 0,
47267        );
47268        assert_eq_m512(r, c);
47269        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47270            a,
47271            b,
47272            c,
47273            0b00000000_11111111,
47274        );
47275        let e = _mm512_setr_ps(
47276            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47277            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47278        );
47279        assert_eq_m512(r, e);
47280    }
47281
47282    #[simd_test(enable = "avx512f")]
47283    unsafe fn test_mm512_fnmsub_round_ps() {
47284        let a = _mm512_set1_ps(0.00000007);
47285        let b = _mm512_set1_ps(1.);
47286        let c = _mm512_set1_ps(-1.);
47287        let r =
47288            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47289        let e = _mm512_set1_ps(0.99999994);
47290        assert_eq_m512(r, e);
47291        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47292        let e = _mm512_set1_ps(0.9999999);
47293        assert_eq_m512(r, e);
47294    }
47295
47296    #[simd_test(enable = "avx512f")]
47297    unsafe fn test_mm512_mask_fnmsub_round_ps() {
47298        let a = _mm512_set1_ps(0.00000007);
47299        let b = _mm512_set1_ps(1.);
47300        let c = _mm512_set1_ps(-1.);
47301        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47302            a, 0, b, c,
47303        );
47304        assert_eq_m512(r, a);
47305        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47306            a,
47307            0b00000000_11111111,
47308            b,
47309            c,
47310        );
47311        let e = _mm512_setr_ps(
47312            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47313            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47314            0.00000007, 0.00000007,
47315        );
47316        assert_eq_m512(r, e);
47317    }
47318
47319    #[simd_test(enable = "avx512f")]
47320    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47321        let a = _mm512_set1_ps(0.00000007);
47322        let b = _mm512_set1_ps(1.);
47323        let c = _mm512_set1_ps(-1.);
47324        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47325            0, a, b, c,
47326        );
47327        assert_eq_m512(r, _mm512_setzero_ps());
47328        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47329            0b00000000_11111111,
47330            a,
47331            b,
47332            c,
47333        );
47334        let e = _mm512_setr_ps(
47335            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47336            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47337        );
47338        assert_eq_m512(r, e);
47339    }
47340
47341    #[simd_test(enable = "avx512f")]
47342    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47343        let a = _mm512_set1_ps(0.00000007);
47344        let b = _mm512_set1_ps(1.);
47345        let c = _mm512_set1_ps(-1.);
47346        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47347            a, b, c, 0,
47348        );
47349        assert_eq_m512(r, c);
47350        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47351            a,
47352            b,
47353            c,
47354            0b00000000_11111111,
47355        );
47356        let e = _mm512_setr_ps(
47357            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47358            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47359        );
47360        assert_eq_m512(r, e);
47361    }
47362
47363    #[simd_test(enable = "avx512f")]
47364    unsafe fn test_mm512_max_round_ps() {
47365        let a = _mm512_setr_ps(
47366            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47367        );
47368        let b = _mm512_setr_ps(
47369            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47370        );
47371        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47372        let e = _mm512_setr_ps(
47373            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47374        );
47375        assert_eq_m512(r, e);
47376    }
47377
47378    #[simd_test(enable = "avx512f")]
47379    unsafe fn test_mm512_mask_max_round_ps() {
47380        let a = _mm512_setr_ps(
47381            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47382        );
47383        let b = _mm512_setr_ps(
47384            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47385        );
47386        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47387        assert_eq_m512(r, a);
47388        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47389        let e = _mm512_setr_ps(
47390            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47391        );
47392        assert_eq_m512(r, e);
47393    }
47394
47395    #[simd_test(enable = "avx512f")]
47396    unsafe fn test_mm512_maskz_max_round_ps() {
47397        let a = _mm512_setr_ps(
47398            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47399        );
47400        let b = _mm512_setr_ps(
47401            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47402        );
47403        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47404        assert_eq_m512(r, _mm512_setzero_ps());
47405        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47406        let e = _mm512_setr_ps(
47407            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47408        );
47409        assert_eq_m512(r, e);
47410    }
47411
47412    #[simd_test(enable = "avx512f")]
47413    unsafe fn test_mm512_min_round_ps() {
47414        let a = _mm512_setr_ps(
47415            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47416        );
47417        let b = _mm512_setr_ps(
47418            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47419        );
47420        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47421        let e = _mm512_setr_ps(
47422            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47423        );
47424        assert_eq_m512(r, e);
47425    }
47426
47427    #[simd_test(enable = "avx512f")]
47428    unsafe fn test_mm512_mask_min_round_ps() {
47429        let a = _mm512_setr_ps(
47430            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47431        );
47432        let b = _mm512_setr_ps(
47433            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47434        );
47435        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47436        assert_eq_m512(r, a);
47437        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47438        let e = _mm512_setr_ps(
47439            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47440        );
47441        assert_eq_m512(r, e);
47442    }
47443
47444    #[simd_test(enable = "avx512f")]
47445    unsafe fn test_mm512_maskz_min_round_ps() {
47446        let a = _mm512_setr_ps(
47447            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47448        );
47449        let b = _mm512_setr_ps(
47450            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47451        );
47452        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47453        assert_eq_m512(r, _mm512_setzero_ps());
47454        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47455        let e = _mm512_setr_ps(
47456            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47457        );
47458        assert_eq_m512(r, e);
47459    }
47460
47461    #[simd_test(enable = "avx512f")]
47462    unsafe fn test_mm512_getexp_round_ps() {
47463        let a = _mm512_set1_ps(3.);
47464        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47465        let e = _mm512_set1_ps(1.);
47466        assert_eq_m512(r, e);
47467    }
47468
47469    #[simd_test(enable = "avx512f")]
47470    unsafe fn test_mm512_mask_getexp_round_ps() {
47471        let a = _mm512_set1_ps(3.);
47472        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47473        assert_eq_m512(r, a);
47474        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47475        let e = _mm512_setr_ps(
47476            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47477        );
47478        assert_eq_m512(r, e);
47479    }
47480
47481    #[simd_test(enable = "avx512f")]
47482    unsafe fn test_mm512_maskz_getexp_round_ps() {
47483        let a = _mm512_set1_ps(3.);
47484        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47485        assert_eq_m512(r, _mm512_setzero_ps());
47486        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47487        let e = _mm512_setr_ps(
47488            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47489        );
47490        assert_eq_m512(r, e);
47491    }
47492
47493    #[simd_test(enable = "avx512f")]
47494    unsafe fn test_mm512_roundscale_round_ps() {
47495        let a = _mm512_set1_ps(1.1);
47496        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47497        let e = _mm512_set1_ps(1.0);
47498        assert_eq_m512(r, e);
47499    }
47500
47501    #[simd_test(enable = "avx512f")]
47502    unsafe fn test_mm512_mask_roundscale_round_ps() {
47503        let a = _mm512_set1_ps(1.1);
47504        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47505        let e = _mm512_set1_ps(1.1);
47506        assert_eq_m512(r, e);
47507        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47508            a,
47509            0b11111111_11111111,
47510            a,
47511        );
47512        let e = _mm512_set1_ps(1.0);
47513        assert_eq_m512(r, e);
47514    }
47515
47516    #[simd_test(enable = "avx512f")]
47517    unsafe fn test_mm512_maskz_roundscale_round_ps() {
47518        let a = _mm512_set1_ps(1.1);
47519        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47520        assert_eq_m512(r, _mm512_setzero_ps());
47521        let r =
47522            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47523        let e = _mm512_set1_ps(1.0);
47524        assert_eq_m512(r, e);
47525    }
47526
47527    #[simd_test(enable = "avx512f")]
47528    unsafe fn test_mm512_scalef_round_ps() {
47529        let a = _mm512_set1_ps(1.);
47530        let b = _mm512_set1_ps(3.);
47531        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47532        let e = _mm512_set1_ps(8.);
47533        assert_eq_m512(r, e);
47534    }
47535
47536    #[simd_test(enable = "avx512f")]
47537    unsafe fn test_mm512_mask_scalef_round_ps() {
47538        let a = _mm512_set1_ps(1.);
47539        let b = _mm512_set1_ps(3.);
47540        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47541            a, 0, a, b,
47542        );
47543        assert_eq_m512(r, a);
47544        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47545            a,
47546            0b11111111_00000000,
47547            a,
47548            b,
47549        );
47550        let e = _mm512_set_ps(
47551            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47552        );
47553        assert_eq_m512(r, e);
47554    }
47555
47556    #[simd_test(enable = "avx512f")]
47557    unsafe fn test_mm512_maskz_scalef_round_ps() {
47558        let a = _mm512_set1_ps(1.);
47559        let b = _mm512_set1_ps(3.);
47560        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47561            0, a, b,
47562        );
47563        assert_eq_m512(r, _mm512_setzero_ps());
47564        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47565            0b11111111_00000000,
47566            a,
47567            b,
47568        );
47569        let e = _mm512_set_ps(
47570            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47571        );
47572        assert_eq_m512(r, e);
47573    }
47574
47575    #[simd_test(enable = "avx512f")]
47576    unsafe fn test_mm512_fixupimm_round_ps() {
47577        let a = _mm512_set1_ps(f32::NAN);
47578        let b = _mm512_set1_ps(f32::MAX);
47579        let c = _mm512_set1_epi32(i32::MAX);
47580        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47581        let e = _mm512_set1_ps(0.0);
47582        assert_eq_m512(r, e);
47583    }
47584
47585    #[simd_test(enable = "avx512f")]
47586    unsafe fn test_mm512_mask_fixupimm_round_ps() {
47587        #[rustfmt::skip]
47588        let a = _mm512_set_ps(
47589            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47590            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47591            1., 1., 1., 1.,
47592            1., 1., 1., 1.,
47593        );
47594        let b = _mm512_set1_ps(f32::MAX);
47595        let c = _mm512_set1_epi32(i32::MAX);
47596        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47597            a,
47598            0b11111111_00000000,
47599            b,
47600            c,
47601        );
47602        let e = _mm512_set_ps(
47603            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47604        );
47605        assert_eq_m512(r, e);
47606    }
47607
47608    #[simd_test(enable = "avx512f")]
47609    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47610        #[rustfmt::skip]
47611        let a = _mm512_set_ps(
47612            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47613            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47614            1., 1., 1., 1.,
47615            1., 1., 1., 1.,
47616        );
47617        let b = _mm512_set1_ps(f32::MAX);
47618        let c = _mm512_set1_epi32(i32::MAX);
47619        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47620            0b11111111_00000000,
47621            a,
47622            b,
47623            c,
47624        );
47625        let e = _mm512_set_ps(
47626            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47627        );
47628        assert_eq_m512(r, e);
47629    }
47630
47631    #[simd_test(enable = "avx512f")]
47632    unsafe fn test_mm512_getmant_round_ps() {
47633        let a = _mm512_set1_ps(10.);
47634        let r = _mm512_getmant_round_ps::<
47635            _MM_MANT_NORM_1_2,
47636            _MM_MANT_SIGN_SRC,
47637            _MM_FROUND_CUR_DIRECTION,
47638        >(a);
47639        let e = _mm512_set1_ps(1.25);
47640        assert_eq_m512(r, e);
47641    }
47642
47643    #[simd_test(enable = "avx512f")]
47644    unsafe fn test_mm512_mask_getmant_round_ps() {
47645        let a = _mm512_set1_ps(10.);
47646        let r = _mm512_mask_getmant_round_ps::<
47647            _MM_MANT_NORM_1_2,
47648            _MM_MANT_SIGN_SRC,
47649            _MM_FROUND_CUR_DIRECTION,
47650        >(a, 0, a);
47651        assert_eq_m512(r, a);
47652        let r = _mm512_mask_getmant_round_ps::<
47653            _MM_MANT_NORM_1_2,
47654            _MM_MANT_SIGN_SRC,
47655            _MM_FROUND_CUR_DIRECTION,
47656        >(a, 0b11111111_00000000, a);
47657        let e = _mm512_setr_ps(
47658            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47659        );
47660        assert_eq_m512(r, e);
47661    }
47662
47663    #[simd_test(enable = "avx512f")]
47664    unsafe fn test_mm512_maskz_getmant_round_ps() {
47665        let a = _mm512_set1_ps(10.);
47666        let r = _mm512_maskz_getmant_round_ps::<
47667            _MM_MANT_NORM_1_2,
47668            _MM_MANT_SIGN_SRC,
47669            _MM_FROUND_CUR_DIRECTION,
47670        >(0, a);
47671        assert_eq_m512(r, _mm512_setzero_ps());
47672        let r = _mm512_maskz_getmant_round_ps::<
47673            _MM_MANT_NORM_1_2,
47674            _MM_MANT_SIGN_SRC,
47675            _MM_FROUND_CUR_DIRECTION,
47676        >(0b11111111_00000000, a);
47677        let e = _mm512_setr_ps(
47678            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47679        );
47680        assert_eq_m512(r, e);
47681    }
47682
47683    #[simd_test(enable = "avx512f")]
47684    unsafe fn test_mm512_cvtps_epi32() {
47685        let a = _mm512_setr_ps(
47686            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47687        );
47688        let r = _mm512_cvtps_epi32(a);
47689        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47690        assert_eq_m512i(r, e);
47691    }
47692
47693    #[simd_test(enable = "avx512f")]
47694    unsafe fn test_mm512_mask_cvtps_epi32() {
47695        let a = _mm512_setr_ps(
47696            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47697        );
47698        let src = _mm512_set1_epi32(0);
47699        let r = _mm512_mask_cvtps_epi32(src, 0, a);
47700        assert_eq_m512i(r, src);
47701        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47702        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47703        assert_eq_m512i(r, e);
47704    }
47705
47706    #[simd_test(enable = "avx512f")]
47707    unsafe fn test_mm512_maskz_cvtps_epi32() {
47708        let a = _mm512_setr_ps(
47709            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47710        );
47711        let r = _mm512_maskz_cvtps_epi32(0, a);
47712        assert_eq_m512i(r, _mm512_setzero_si512());
47713        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47714        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47715        assert_eq_m512i(r, e);
47716    }
47717
47718    #[simd_test(enable = "avx512f,avx512vl")]
47719    unsafe fn test_mm256_mask_cvtps_epi32() {
47720        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47721        let src = _mm256_set1_epi32(0);
47722        let r = _mm256_mask_cvtps_epi32(src, 0, a);
47723        assert_eq_m256i(r, src);
47724        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47725        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47726        assert_eq_m256i(r, e);
47727    }
47728
47729    #[simd_test(enable = "avx512f,avx512vl")]
47730    unsafe fn test_mm256_maskz_cvtps_epi32() {
47731        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47732        let r = _mm256_maskz_cvtps_epi32(0, a);
47733        assert_eq_m256i(r, _mm256_setzero_si256());
47734        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47735        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47736        assert_eq_m256i(r, e);
47737    }
47738
47739    #[simd_test(enable = "avx512f,avx512vl")]
47740    unsafe fn test_mm_mask_cvtps_epi32() {
47741        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47742        let src = _mm_set1_epi32(0);
47743        let r = _mm_mask_cvtps_epi32(src, 0, a);
47744        assert_eq_m128i(r, src);
47745        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47746        let e = _mm_set_epi32(12, 14, 14, 16);
47747        assert_eq_m128i(r, e);
47748    }
47749
47750    #[simd_test(enable = "avx512f,avx512vl")]
47751    unsafe fn test_mm_maskz_cvtps_epi32() {
47752        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47753        let r = _mm_maskz_cvtps_epi32(0, a);
47754        assert_eq_m128i(r, _mm_setzero_si128());
47755        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47756        let e = _mm_set_epi32(12, 14, 14, 16);
47757        assert_eq_m128i(r, e);
47758    }
47759
47760    #[simd_test(enable = "avx512f")]
47761    unsafe fn test_mm512_cvtps_epu32() {
47762        let a = _mm512_setr_ps(
47763            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47764        );
47765        let r = _mm512_cvtps_epu32(a);
47766        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47767        assert_eq_m512i(r, e);
47768    }
47769
47770    #[simd_test(enable = "avx512f")]
47771    unsafe fn test_mm512_mask_cvtps_epu32() {
47772        let a = _mm512_setr_ps(
47773            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47774        );
47775        let src = _mm512_set1_epi32(0);
47776        let r = _mm512_mask_cvtps_epu32(src, 0, a);
47777        assert_eq_m512i(r, src);
47778        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47779        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47780        assert_eq_m512i(r, e);
47781    }
47782
47783    #[simd_test(enable = "avx512f")]
47784    unsafe fn test_mm512_maskz_cvtps_epu32() {
47785        let a = _mm512_setr_ps(
47786            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47787        );
47788        let r = _mm512_maskz_cvtps_epu32(0, a);
47789        assert_eq_m512i(r, _mm512_setzero_si512());
47790        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47791        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47792        assert_eq_m512i(r, e);
47793    }
47794
47795    #[simd_test(enable = "avx512f,avx512vl")]
47796    unsafe fn test_mm256_cvtps_epu32() {
47797        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47798        let r = _mm256_cvtps_epu32(a);
47799        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47800        assert_eq_m256i(r, e);
47801    }
47802
47803    #[simd_test(enable = "avx512f,avx512vl")]
47804    unsafe fn test_mm256_mask_cvtps_epu32() {
47805        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47806        let src = _mm256_set1_epi32(0);
47807        let r = _mm256_mask_cvtps_epu32(src, 0, a);
47808        assert_eq_m256i(r, src);
47809        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47810        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47811        assert_eq_m256i(r, e);
47812    }
47813
47814    #[simd_test(enable = "avx512f,avx512vl")]
47815    unsafe fn test_mm256_maskz_cvtps_epu32() {
47816        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47817        let r = _mm256_maskz_cvtps_epu32(0, a);
47818        assert_eq_m256i(r, _mm256_setzero_si256());
47819        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47820        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47821        assert_eq_m256i(r, e);
47822    }
47823
47824    #[simd_test(enable = "avx512f,avx512vl")]
47825    unsafe fn test_mm_cvtps_epu32() {
47826        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47827        let r = _mm_cvtps_epu32(a);
47828        let e = _mm_set_epi32(12, 14, 14, 16);
47829        assert_eq_m128i(r, e);
47830    }
47831
47832    #[simd_test(enable = "avx512f,avx512vl")]
47833    unsafe fn test_mm_mask_cvtps_epu32() {
47834        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47835        let src = _mm_set1_epi32(0);
47836        let r = _mm_mask_cvtps_epu32(src, 0, a);
47837        assert_eq_m128i(r, src);
47838        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47839        let e = _mm_set_epi32(12, 14, 14, 16);
47840        assert_eq_m128i(r, e);
47841    }
47842
47843    #[simd_test(enable = "avx512f,avx512vl")]
47844    unsafe fn test_mm_maskz_cvtps_epu32() {
47845        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47846        let r = _mm_maskz_cvtps_epu32(0, a);
47847        assert_eq_m128i(r, _mm_setzero_si128());
47848        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47849        let e = _mm_set_epi32(12, 14, 14, 16);
47850        assert_eq_m128i(r, e);
47851    }
47852
47853    #[simd_test(enable = "avx512f")]
47854    unsafe fn test_mm512_cvtepi8_epi32() {
47855        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47856        let r = _mm512_cvtepi8_epi32(a);
47857        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47858        assert_eq_m512i(r, e);
47859    }
47860
47861    #[simd_test(enable = "avx512f")]
47862    unsafe fn test_mm512_mask_cvtepi8_epi32() {
47863        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47864        let src = _mm512_set1_epi32(-1);
47865        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47866        assert_eq_m512i(r, src);
47867        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47868        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47869        assert_eq_m512i(r, e);
47870    }
47871
47872    #[simd_test(enable = "avx512f")]
47873    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47874        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47875        let r = _mm512_maskz_cvtepi8_epi32(0, a);
47876        assert_eq_m512i(r, _mm512_setzero_si512());
47877        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47878        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47879        assert_eq_m512i(r, e);
47880    }
47881
47882    #[simd_test(enable = "avx512f,avx512vl")]
47883    unsafe fn test_mm256_mask_cvtepi8_epi32() {
47884        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47885        let src = _mm256_set1_epi32(-1);
47886        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47887        assert_eq_m256i(r, src);
47888        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47889        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47890        assert_eq_m256i(r, e);
47891    }
47892
47893    #[simd_test(enable = "avx512f,avx512vl")]
47894    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47895        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47896        let r = _mm256_maskz_cvtepi8_epi32(0, a);
47897        assert_eq_m256i(r, _mm256_setzero_si256());
47898        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47899        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47900        assert_eq_m256i(r, e);
47901    }
47902
47903    #[simd_test(enable = "avx512f,avx512vl")]
47904    unsafe fn test_mm_mask_cvtepi8_epi32() {
47905        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47906        let src = _mm_set1_epi32(-1);
47907        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47908        assert_eq_m128i(r, src);
47909        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47910        let e = _mm_set_epi32(12, 13, 14, 15);
47911        assert_eq_m128i(r, e);
47912    }
47913
47914    #[simd_test(enable = "avx512f,avx512vl")]
47915    unsafe fn test_mm_maskz_cvtepi8_epi32() {
47916        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47917        let r = _mm_maskz_cvtepi8_epi32(0, a);
47918        assert_eq_m128i(r, _mm_setzero_si128());
47919        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47920        let e = _mm_set_epi32(12, 13, 14, 15);
47921        assert_eq_m128i(r, e);
47922    }
47923
47924    #[simd_test(enable = "avx512f")]
47925    unsafe fn test_mm512_cvtepu8_epi32() {
47926        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47927        let r = _mm512_cvtepu8_epi32(a);
47928        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47929        assert_eq_m512i(r, e);
47930    }
47931
47932    #[simd_test(enable = "avx512f")]
47933    unsafe fn test_mm512_mask_cvtepu8_epi32() {
47934        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47935        let src = _mm512_set1_epi32(-1);
47936        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47937        assert_eq_m512i(r, src);
47938        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47939        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47940        assert_eq_m512i(r, e);
47941    }
47942
47943    #[simd_test(enable = "avx512f")]
47944    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47945        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47946        let r = _mm512_maskz_cvtepu8_epi32(0, a);
47947        assert_eq_m512i(r, _mm512_setzero_si512());
47948        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
47949        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47950        assert_eq_m512i(r, e);
47951    }
47952
47953    #[simd_test(enable = "avx512f,avx512vl")]
47954    unsafe fn test_mm256_mask_cvtepu8_epi32() {
47955        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47956        let src = _mm256_set1_epi32(-1);
47957        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
47958        assert_eq_m256i(r, src);
47959        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
47960        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47961        assert_eq_m256i(r, e);
47962    }
47963
47964    #[simd_test(enable = "avx512f,avx512vl")]
47965    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47966        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47967        let r = _mm256_maskz_cvtepu8_epi32(0, a);
47968        assert_eq_m256i(r, _mm256_setzero_si256());
47969        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
47970        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47971        assert_eq_m256i(r, e);
47972    }
47973
47974    #[simd_test(enable = "avx512f,avx512vl")]
47975    unsafe fn test_mm_mask_cvtepu8_epi32() {
47976        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47977        let src = _mm_set1_epi32(-1);
47978        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
47979        assert_eq_m128i(r, src);
47980        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
47981        let e = _mm_set_epi32(12, 13, 14, 15);
47982        assert_eq_m128i(r, e);
47983    }
47984
47985    #[simd_test(enable = "avx512f,avx512vl")]
47986    unsafe fn test_mm_maskz_cvtepu8_epi32() {
47987        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47988        let r = _mm_maskz_cvtepu8_epi32(0, a);
47989        assert_eq_m128i(r, _mm_setzero_si128());
47990        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
47991        let e = _mm_set_epi32(12, 13, 14, 15);
47992        assert_eq_m128i(r, e);
47993    }
47994
47995    #[simd_test(enable = "avx512f")]
47996    unsafe fn test_mm512_cvtepi16_epi32() {
47997        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47998        let r = _mm512_cvtepi16_epi32(a);
47999        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48000        assert_eq_m512i(r, e);
48001    }
48002
48003    #[simd_test(enable = "avx512f")]
48004    unsafe fn test_mm512_mask_cvtepi16_epi32() {
48005        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48006        let src = _mm512_set1_epi32(-1);
48007        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
48008        assert_eq_m512i(r, src);
48009        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
48010        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48011        assert_eq_m512i(r, e);
48012    }
48013
48014    #[simd_test(enable = "avx512f")]
48015    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
48016        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48017        let r = _mm512_maskz_cvtepi16_epi32(0, a);
48018        assert_eq_m512i(r, _mm512_setzero_si512());
48019        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48020        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48021        assert_eq_m512i(r, e);
48022    }
48023
48024    #[simd_test(enable = "avx512f,avx512vl")]
48025    unsafe fn test_mm256_mask_cvtepi16_epi32() {
48026        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48027        let src = _mm256_set1_epi32(-1);
48028        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48029        assert_eq_m256i(r, src);
48030        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48031        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48032        assert_eq_m256i(r, e);
48033    }
48034
48035    #[simd_test(enable = "avx512f,avx512vl")]
48036    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48037        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48038        let r = _mm256_maskz_cvtepi16_epi32(0, a);
48039        assert_eq_m256i(r, _mm256_setzero_si256());
48040        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48041        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48042        assert_eq_m256i(r, e);
48043    }
48044
48045    #[simd_test(enable = "avx512f,avx512vl")]
48046    unsafe fn test_mm_mask_cvtepi16_epi32() {
48047        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48048        let src = _mm_set1_epi32(-1);
48049        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48050        assert_eq_m128i(r, src);
48051        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48052        let e = _mm_set_epi32(4, 5, 6, 7);
48053        assert_eq_m128i(r, e);
48054    }
48055
48056    #[simd_test(enable = "avx512f,avx512vl")]
48057    unsafe fn test_mm_maskz_cvtepi16_epi32() {
48058        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48059        let r = _mm_maskz_cvtepi16_epi32(0, a);
48060        assert_eq_m128i(r, _mm_setzero_si128());
48061        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48062        let e = _mm_set_epi32(4, 5, 6, 7);
48063        assert_eq_m128i(r, e);
48064    }
48065
48066    #[simd_test(enable = "avx512f")]
48067    unsafe fn test_mm512_cvtepu16_epi32() {
48068        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48069        let r = _mm512_cvtepu16_epi32(a);
48070        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48071        assert_eq_m512i(r, e);
48072    }
48073
48074    #[simd_test(enable = "avx512f")]
48075    unsafe fn test_mm512_mask_cvtepu16_epi32() {
48076        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48077        let src = _mm512_set1_epi32(-1);
48078        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48079        assert_eq_m512i(r, src);
48080        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48081        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48082        assert_eq_m512i(r, e);
48083    }
48084
48085    #[simd_test(enable = "avx512f")]
48086    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48087        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48088        let r = _mm512_maskz_cvtepu16_epi32(0, a);
48089        assert_eq_m512i(r, _mm512_setzero_si512());
48090        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48091        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48092        assert_eq_m512i(r, e);
48093    }
48094
48095    #[simd_test(enable = "avx512f,avx512vl")]
48096    unsafe fn test_mm256_mask_cvtepu16_epi32() {
48097        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48098        let src = _mm256_set1_epi32(-1);
48099        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48100        assert_eq_m256i(r, src);
48101        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48102        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48103        assert_eq_m256i(r, e);
48104    }
48105
48106    #[simd_test(enable = "avx512f,avx512vl")]
48107    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48108        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48109        let r = _mm256_maskz_cvtepu16_epi32(0, a);
48110        assert_eq_m256i(r, _mm256_setzero_si256());
48111        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48112        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48113        assert_eq_m256i(r, e);
48114    }
48115
48116    #[simd_test(enable = "avx512f,avx512vl")]
48117    unsafe fn test_mm_mask_cvtepu16_epi32() {
48118        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48119        let src = _mm_set1_epi32(-1);
48120        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48121        assert_eq_m128i(r, src);
48122        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48123        let e = _mm_set_epi32(12, 13, 14, 15);
48124        assert_eq_m128i(r, e);
48125    }
48126
48127    #[simd_test(enable = "avx512f,avx512vl")]
48128    unsafe fn test_mm_maskz_cvtepu16_epi32() {
48129        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48130        let r = _mm_maskz_cvtepu16_epi32(0, a);
48131        assert_eq_m128i(r, _mm_setzero_si128());
48132        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48133        let e = _mm_set_epi32(12, 13, 14, 15);
48134        assert_eq_m128i(r, e);
48135    }
48136
48137    #[simd_test(enable = "avx512f")]
48138    unsafe fn test_mm512_cvtepi32_ps() {
48139        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48140        let r = _mm512_cvtepi32_ps(a);
48141        let e = _mm512_set_ps(
48142            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48143        );
48144        assert_eq_m512(r, e);
48145    }
48146
48147    #[simd_test(enable = "avx512f")]
48148    unsafe fn test_mm512_mask_cvtepi32_ps() {
48149        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48150        let src = _mm512_set1_ps(-1.);
48151        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48152        assert_eq_m512(r, src);
48153        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48154        let e = _mm512_set_ps(
48155            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48156        );
48157        assert_eq_m512(r, e);
48158    }
48159
48160    #[simd_test(enable = "avx512f")]
48161    unsafe fn test_mm512_maskz_cvtepi32_ps() {
48162        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48163        let r = _mm512_maskz_cvtepi32_ps(0, a);
48164        assert_eq_m512(r, _mm512_setzero_ps());
48165        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48166        let e = _mm512_set_ps(
48167            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48168        );
48169        assert_eq_m512(r, e);
48170    }
48171
48172    #[simd_test(enable = "avx512f,avx512vl")]
48173    unsafe fn test_mm256_mask_cvtepi32_ps() {
48174        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48175        let src = _mm256_set1_ps(-1.);
48176        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48177        assert_eq_m256(r, src);
48178        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48179        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48180        assert_eq_m256(r, e);
48181    }
48182
48183    #[simd_test(enable = "avx512f,avx512vl")]
48184    unsafe fn test_mm256_maskz_cvtepi32_ps() {
48185        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48186        let r = _mm256_maskz_cvtepi32_ps(0, a);
48187        assert_eq_m256(r, _mm256_setzero_ps());
48188        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48189        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48190        assert_eq_m256(r, e);
48191    }
48192
48193    #[simd_test(enable = "avx512f,avx512vl")]
48194    unsafe fn test_mm_mask_cvtepi32_ps() {
48195        let a = _mm_set_epi32(1, 2, 3, 4);
48196        let src = _mm_set1_ps(-1.);
48197        let r = _mm_mask_cvtepi32_ps(src, 0, a);
48198        assert_eq_m128(r, src);
48199        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48200        let e = _mm_set_ps(1., 2., 3., 4.);
48201        assert_eq_m128(r, e);
48202    }
48203
48204    #[simd_test(enable = "avx512f,avx512vl")]
48205    unsafe fn test_mm_maskz_cvtepi32_ps() {
48206        let a = _mm_set_epi32(1, 2, 3, 4);
48207        let r = _mm_maskz_cvtepi32_ps(0, a);
48208        assert_eq_m128(r, _mm_setzero_ps());
48209        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48210        let e = _mm_set_ps(1., 2., 3., 4.);
48211        assert_eq_m128(r, e);
48212    }
48213
48214    #[simd_test(enable = "avx512f")]
48215    unsafe fn test_mm512_cvtepu32_ps() {
48216        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48217        let r = _mm512_cvtepu32_ps(a);
48218        let e = _mm512_set_ps(
48219            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48220        );
48221        assert_eq_m512(r, e);
48222    }
48223
48224    #[simd_test(enable = "avx512f")]
48225    unsafe fn test_mm512_mask_cvtepu32_ps() {
48226        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48227        let src = _mm512_set1_ps(-1.);
48228        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48229        assert_eq_m512(r, src);
48230        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48231        let e = _mm512_set_ps(
48232            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48233        );
48234        assert_eq_m512(r, e);
48235    }
48236
48237    #[simd_test(enable = "avx512f")]
48238    unsafe fn test_mm512_maskz_cvtepu32_ps() {
48239        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48240        let r = _mm512_maskz_cvtepu32_ps(0, a);
48241        assert_eq_m512(r, _mm512_setzero_ps());
48242        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48243        let e = _mm512_set_ps(
48244            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48245        );
48246        assert_eq_m512(r, e);
48247    }
48248
48249    #[simd_test(enable = "avx512f")]
48250    unsafe fn test_mm512_cvtepi32_epi16() {
48251        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48252        let r = _mm512_cvtepi32_epi16(a);
48253        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48254        assert_eq_m256i(r, e);
48255    }
48256
48257    #[simd_test(enable = "avx512f")]
48258    unsafe fn test_mm512_mask_cvtepi32_epi16() {
48259        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48260        let src = _mm256_set1_epi16(-1);
48261        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48262        assert_eq_m256i(r, src);
48263        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48264        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48265        assert_eq_m256i(r, e);
48266    }
48267
48268    #[simd_test(enable = "avx512f")]
48269    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48270        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48271        let r = _mm512_maskz_cvtepi32_epi16(0, a);
48272        assert_eq_m256i(r, _mm256_setzero_si256());
48273        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48274        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48275        assert_eq_m256i(r, e);
48276    }
48277
48278    #[simd_test(enable = "avx512f,avx512vl")]
48279    unsafe fn test_mm256_cvtepi32_epi16() {
48280        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48281        let r = _mm256_cvtepi32_epi16(a);
48282        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48283        assert_eq_m128i(r, e);
48284    }
48285
48286    #[simd_test(enable = "avx512f,avx512vl")]
48287    unsafe fn test_mm256_mask_cvtepi32_epi16() {
48288        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48289        let src = _mm_set1_epi16(-1);
48290        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48291        assert_eq_m128i(r, src);
48292        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48293        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48294        assert_eq_m128i(r, e);
48295    }
48296
48297    #[simd_test(enable = "avx512f,avx512vl")]
48298    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48299        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48300        let r = _mm256_maskz_cvtepi32_epi16(0, a);
48301        assert_eq_m128i(r, _mm_setzero_si128());
48302        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48303        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48304        assert_eq_m128i(r, e);
48305    }
48306
48307    #[simd_test(enable = "avx512f,avx512vl")]
48308    unsafe fn test_mm_cvtepi32_epi16() {
48309        let a = _mm_set_epi32(4, 5, 6, 7);
48310        let r = _mm_cvtepi32_epi16(a);
48311        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48312        assert_eq_m128i(r, e);
48313    }
48314
48315    #[simd_test(enable = "avx512f,avx512vl")]
48316    unsafe fn test_mm_mask_cvtepi32_epi16() {
48317        let a = _mm_set_epi32(4, 5, 6, 7);
48318        let src = _mm_set1_epi16(0);
48319        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48320        assert_eq_m128i(r, src);
48321        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48322        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48323        assert_eq_m128i(r, e);
48324    }
48325
48326    #[simd_test(enable = "avx512f,avx512vl")]
48327    unsafe fn test_mm_maskz_cvtepi32_epi16() {
48328        let a = _mm_set_epi32(4, 5, 6, 7);
48329        let r = _mm_maskz_cvtepi32_epi16(0, a);
48330        assert_eq_m128i(r, _mm_setzero_si128());
48331        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48332        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48333        assert_eq_m128i(r, e);
48334    }
48335
48336    #[simd_test(enable = "avx512f")]
48337    unsafe fn test_mm512_cvtepi32_epi8() {
48338        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48339        let r = _mm512_cvtepi32_epi8(a);
48340        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48341        assert_eq_m128i(r, e);
48342    }
48343
48344    #[simd_test(enable = "avx512f")]
48345    unsafe fn test_mm512_mask_cvtepi32_epi8() {
48346        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48347        let src = _mm_set1_epi8(-1);
48348        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48349        assert_eq_m128i(r, src);
48350        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48351        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48352        assert_eq_m128i(r, e);
48353    }
48354
48355    #[simd_test(enable = "avx512f")]
48356    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48357        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48358        let r = _mm512_maskz_cvtepi32_epi8(0, a);
48359        assert_eq_m128i(r, _mm_setzero_si128());
48360        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48361        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48362        assert_eq_m128i(r, e);
48363    }
48364
48365    #[simd_test(enable = "avx512f,avx512vl")]
48366    unsafe fn test_mm256_cvtepi32_epi8() {
48367        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48368        let r = _mm256_cvtepi32_epi8(a);
48369        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48370        assert_eq_m128i(r, e);
48371    }
48372
48373    #[simd_test(enable = "avx512f,avx512vl")]
48374    unsafe fn test_mm256_mask_cvtepi32_epi8() {
48375        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48376        let src = _mm_set1_epi8(0);
48377        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48378        assert_eq_m128i(r, src);
48379        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48380        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48381        assert_eq_m128i(r, e);
48382    }
48383
48384    #[simd_test(enable = "avx512f,avx512vl")]
48385    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48386        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48387        let r = _mm256_maskz_cvtepi32_epi8(0, a);
48388        assert_eq_m128i(r, _mm_setzero_si128());
48389        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48390        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48391        assert_eq_m128i(r, e);
48392    }
48393
48394    #[simd_test(enable = "avx512f,avx512vl")]
48395    unsafe fn test_mm_cvtepi32_epi8() {
48396        let a = _mm_set_epi32(4, 5, 6, 7);
48397        let r = _mm_cvtepi32_epi8(a);
48398        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48399        assert_eq_m128i(r, e);
48400    }
48401
48402    #[simd_test(enable = "avx512f,avx512vl")]
48403    unsafe fn test_mm_mask_cvtepi32_epi8() {
48404        let a = _mm_set_epi32(4, 5, 6, 7);
48405        let src = _mm_set1_epi8(0);
48406        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48407        assert_eq_m128i(r, src);
48408        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48409        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48410        assert_eq_m128i(r, e);
48411    }
48412
48413    #[simd_test(enable = "avx512f,avx512vl")]
48414    unsafe fn test_mm_maskz_cvtepi32_epi8() {
48415        let a = _mm_set_epi32(4, 5, 6, 7);
48416        let r = _mm_maskz_cvtepi32_epi8(0, a);
48417        assert_eq_m128i(r, _mm_setzero_si128());
48418        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48419        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48420        assert_eq_m128i(r, e);
48421    }
48422
48423    #[simd_test(enable = "avx512f")]
48424    unsafe fn test_mm512_cvtsepi32_epi16() {
48425        #[rustfmt::skip]
48426        let a = _mm512_set_epi32(
48427            0, 1, 2, 3,
48428            4, 5, 6, 7,
48429            8, 9, 10, 11,
48430            12, 13, i32::MIN, i32::MAX,
48431        );
48432        let r = _mm512_cvtsepi32_epi16(a);
48433        #[rustfmt::skip]
48434        let e = _mm256_set_epi16(
48435            0, 1, 2, 3,
48436            4, 5, 6, 7,
48437            8, 9, 10, 11,
48438            12, 13, i16::MIN, i16::MAX,
48439        );
48440        assert_eq_m256i(r, e);
48441    }
48442
48443    #[simd_test(enable = "avx512f")]
48444    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48445        #[rustfmt::skip]
48446        let a = _mm512_set_epi32(
48447            0, 1, 2, 3,
48448            4, 5, 6, 7,
48449            8, 9, 10, 11,
48450            12, 13, i32::MIN, i32::MAX,
48451        );
48452        let src = _mm256_set1_epi16(-1);
48453        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48454        assert_eq_m256i(r, src);
48455        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48456        #[rustfmt::skip]
48457        let e = _mm256_set_epi16(
48458            -1, -1, -1, -1,
48459            -1, -1, -1, -1,
48460            8, 9, 10, 11,
48461            12, 13, i16::MIN, i16::MAX,
48462        );
48463        assert_eq_m256i(r, e);
48464    }
48465
48466    #[simd_test(enable = "avx512f")]
48467    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48468        #[rustfmt::skip]
48469        let a = _mm512_set_epi32(
48470            0, 1, 2, 3,
48471            4, 5, 6, 7,
48472            8, 9, 10, 11,
48473            12, 13, i32::MIN, i32::MAX,
48474        );
48475        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48476        assert_eq_m256i(r, _mm256_setzero_si256());
48477        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48478        #[rustfmt::skip]
48479        let e = _mm256_set_epi16(
48480            0, 0, 0, 0,
48481            0, 0, 0, 0,
48482            8, 9, 10, 11,
48483            12, 13, i16::MIN, i16::MAX,
48484        );
48485        assert_eq_m256i(r, e);
48486    }
48487
48488    #[simd_test(enable = "avx512f,avx512vl")]
48489    unsafe fn test_mm256_cvtsepi32_epi16() {
48490        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48491        let r = _mm256_cvtsepi32_epi16(a);
48492        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48493        assert_eq_m128i(r, e);
48494    }
48495
48496    #[simd_test(enable = "avx512f,avx512vl")]
48497    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48498        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48499        let src = _mm_set1_epi16(-1);
48500        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48501        assert_eq_m128i(r, src);
48502        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48503        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48504        assert_eq_m128i(r, e);
48505    }
48506
48507    #[simd_test(enable = "avx512f,avx512vl")]
48508    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48509        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48510        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48511        assert_eq_m128i(r, _mm_setzero_si128());
48512        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48513        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48514        assert_eq_m128i(r, e);
48515    }
48516
48517    #[simd_test(enable = "avx512f,avx512vl")]
48518    unsafe fn test_mm_cvtsepi32_epi16() {
48519        let a = _mm_set_epi32(4, 5, 6, 7);
48520        let r = _mm_cvtsepi32_epi16(a);
48521        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48522        assert_eq_m128i(r, e);
48523    }
48524
48525    #[simd_test(enable = "avx512f,avx512vl")]
48526    unsafe fn test_mm_mask_cvtsepi32_epi16() {
48527        let a = _mm_set_epi32(4, 5, 6, 7);
48528        let src = _mm_set1_epi16(0);
48529        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48530        assert_eq_m128i(r, src);
48531        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48532        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48533        assert_eq_m128i(r, e);
48534    }
48535
48536    #[simd_test(enable = "avx512f,avx512vl")]
48537    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48538        let a = _mm_set_epi32(4, 5, 6, 7);
48539        let r = _mm_maskz_cvtsepi32_epi16(0, a);
48540        assert_eq_m128i(r, _mm_setzero_si128());
48541        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48542        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48543        assert_eq_m128i(r, e);
48544    }
48545
48546    #[simd_test(enable = "avx512f")]
48547    unsafe fn test_mm512_cvtsepi32_epi8() {
48548        #[rustfmt::skip]
48549        let a = _mm512_set_epi32(
48550            0, 1, 2, 3,
48551            4, 5, 6, 7,
48552            8, 9, 10, 11,
48553            12, 13, i32::MIN, i32::MAX,
48554        );
48555        let r = _mm512_cvtsepi32_epi8(a);
48556        #[rustfmt::skip]
48557        let e = _mm_set_epi8(
48558            0, 1, 2, 3,
48559            4, 5, 6, 7,
48560            8, 9, 10, 11,
48561            12, 13, i8::MIN, i8::MAX,
48562        );
48563        assert_eq_m128i(r, e);
48564    }
48565
48566    #[simd_test(enable = "avx512f")]
48567    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48568        #[rustfmt::skip]
48569        let a = _mm512_set_epi32(
48570            0, 1, 2, 3,
48571            4, 5, 6, 7,
48572            8, 9, 10, 11,
48573            12, 13, i32::MIN, i32::MAX,
48574        );
48575        let src = _mm_set1_epi8(-1);
48576        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48577        assert_eq_m128i(r, src);
48578        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48579        #[rustfmt::skip]
48580        let e = _mm_set_epi8(
48581            -1, -1, -1, -1,
48582            -1, -1, -1, -1,
48583            8, 9, 10, 11,
48584            12, 13, i8::MIN, i8::MAX,
48585        );
48586        assert_eq_m128i(r, e);
48587    }
48588
48589    #[simd_test(enable = "avx512f")]
48590    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48591        #[rustfmt::skip]
48592        let a = _mm512_set_epi32(
48593            0, 1, 2, 3,
48594            4, 5, 6, 7,
48595            8, 9, 10, 11,
48596            12, 13, i32::MIN, i32::MAX,
48597        );
48598        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48599        assert_eq_m128i(r, _mm_setzero_si128());
48600        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48601        #[rustfmt::skip]
48602        let e = _mm_set_epi8(
48603            0, 0, 0, 0,
48604            0, 0, 0, 0,
48605            8, 9, 10, 11,
48606            12, 13, i8::MIN, i8::MAX,
48607        );
48608        assert_eq_m128i(r, e);
48609    }
48610
48611    #[simd_test(enable = "avx512f,avx512vl")]
48612    unsafe fn test_mm256_cvtsepi32_epi8() {
48613        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48614        let r = _mm256_cvtsepi32_epi8(a);
48615        #[rustfmt::skip]
48616        let e = _mm_set_epi8(
48617            0, 0, 0, 0,
48618            0, 0, 0, 0,
48619            9, 10, 11, 12,
48620            13, 14, 15, 16,
48621        );
48622        assert_eq_m128i(r, e);
48623    }
48624
48625    #[simd_test(enable = "avx512f,avx512vl")]
48626    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48627        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48628        let src = _mm_set1_epi8(0);
48629        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48630        assert_eq_m128i(r, src);
48631        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48632        #[rustfmt::skip]
48633        let e = _mm_set_epi8(
48634            0, 0, 0, 0,
48635            0, 0, 0, 0,
48636            9, 10, 11, 12,
48637            13, 14, 15, 16,
48638        );
48639        assert_eq_m128i(r, e);
48640    }
48641
48642    #[simd_test(enable = "avx512f,avx512vl")]
48643    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48644        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48645        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48646        assert_eq_m128i(r, _mm_setzero_si128());
48647        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48648        #[rustfmt::skip]
48649        let e = _mm_set_epi8(
48650            0, 0, 0, 0,
48651            0, 0, 0, 0,
48652            9, 10, 11, 12,
48653            13, 14, 15, 16,
48654        );
48655        assert_eq_m128i(r, e);
48656    }
48657
48658    #[simd_test(enable = "avx512f,avx512vl")]
48659    unsafe fn test_mm_cvtsepi32_epi8() {
48660        let a = _mm_set_epi32(13, 14, 15, 16);
48661        let r = _mm_cvtsepi32_epi8(a);
48662        #[rustfmt::skip]
48663        let e = _mm_set_epi8(
48664            0, 0, 0, 0,
48665            0, 0, 0, 0,
48666            0, 0, 0, 0,
48667            13, 14, 15, 16,
48668        );
48669        assert_eq_m128i(r, e);
48670    }
48671
48672    #[simd_test(enable = "avx512f,avx512vl")]
48673    unsafe fn test_mm_mask_cvtsepi32_epi8() {
48674        let a = _mm_set_epi32(13, 14, 15, 16);
48675        let src = _mm_set1_epi8(0);
48676        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48677        assert_eq_m128i(r, src);
48678        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48679        #[rustfmt::skip]
48680        let e = _mm_set_epi8(
48681            0, 0, 0, 0,
48682            0, 0, 0, 0,
48683            0, 0, 0, 0,
48684            13, 14, 15, 16,
48685        );
48686        assert_eq_m128i(r, e);
48687    }
48688
48689    #[simd_test(enable = "avx512f,avx512vl")]
48690    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48691        let a = _mm_set_epi32(13, 14, 15, 16);
48692        let r = _mm_maskz_cvtsepi32_epi8(0, a);
48693        assert_eq_m128i(r, _mm_setzero_si128());
48694        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48695        #[rustfmt::skip]
48696        let e = _mm_set_epi8(
48697            0, 0, 0, 0,
48698            0, 0, 0, 0,
48699            0, 0, 0, 0,
48700            13, 14, 15, 16,
48701        );
48702        assert_eq_m128i(r, e);
48703    }
48704
48705    #[simd_test(enable = "avx512f")]
48706    unsafe fn test_mm512_cvtusepi32_epi16() {
48707        #[rustfmt::skip]
48708        let a = _mm512_set_epi32(
48709            0, 1, 2, 3,
48710            4, 5, 6, 7,
48711            8, 9, 10, 11,
48712            12, 13, i32::MIN, i32::MIN,
48713        );
48714        let r = _mm512_cvtusepi32_epi16(a);
48715        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48716        assert_eq_m256i(r, e);
48717    }
48718
48719    #[simd_test(enable = "avx512f")]
48720    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48721        #[rustfmt::skip]
48722        let a = _mm512_set_epi32(
48723            0, 1, 2, 3,
48724            4, 5, 6, 7,
48725            8, 9, 10, 11,
48726            12, 13, i32::MIN, i32::MIN,
48727        );
48728        let src = _mm256_set1_epi16(-1);
48729        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48730        assert_eq_m256i(r, src);
48731        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48732        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48733        assert_eq_m256i(r, e);
48734    }
48735
48736    #[simd_test(enable = "avx512f")]
48737    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48738        #[rustfmt::skip]
48739        let a = _mm512_set_epi32(
48740            0, 1, 2, 3,
48741            4, 5, 6, 7,
48742            8, 9, 10, 11,
48743            12, 13, i32::MIN, i32::MIN,
48744        );
48745        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48746        assert_eq_m256i(r, _mm256_setzero_si256());
48747        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48748        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48749        assert_eq_m256i(r, e);
48750    }
48751
48752    #[simd_test(enable = "avx512f,avx512vl")]
48753    unsafe fn test_mm256_cvtusepi32_epi16() {
48754        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48755        let r = _mm256_cvtusepi32_epi16(a);
48756        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48757        assert_eq_m128i(r, e);
48758    }
48759
48760    #[simd_test(enable = "avx512f,avx512vl")]
48761    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48762        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48763        let src = _mm_set1_epi16(0);
48764        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48765        assert_eq_m128i(r, src);
48766        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48767        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48768        assert_eq_m128i(r, e);
48769    }
48770
48771    #[simd_test(enable = "avx512f,avx512vl")]
48772    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48773        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48774        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48775        assert_eq_m128i(r, _mm_setzero_si128());
48776        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48777        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48778        assert_eq_m128i(r, e);
48779    }
48780
48781    #[simd_test(enable = "avx512f,avx512vl")]
48782    unsafe fn test_mm_cvtusepi32_epi16() {
48783        let a = _mm_set_epi32(5, 6, 7, 8);
48784        let r = _mm_cvtusepi32_epi16(a);
48785        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48786        assert_eq_m128i(r, e);
48787    }
48788
48789    #[simd_test(enable = "avx512f,avx512vl")]
48790    unsafe fn test_mm_mask_cvtusepi32_epi16() {
48791        let a = _mm_set_epi32(5, 6, 7, 8);
48792        let src = _mm_set1_epi16(0);
48793        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48794        assert_eq_m128i(r, src);
48795        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48796        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48797        assert_eq_m128i(r, e);
48798    }
48799
48800    #[simd_test(enable = "avx512f,avx512vl")]
48801    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48802        let a = _mm_set_epi32(5, 6, 7, 8);
48803        let r = _mm_maskz_cvtusepi32_epi16(0, a);
48804        assert_eq_m128i(r, _mm_setzero_si128());
48805        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48806        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48807        assert_eq_m128i(r, e);
48808    }
48809
48810    #[simd_test(enable = "avx512f")]
48811    unsafe fn test_mm512_cvtusepi32_epi8() {
48812        #[rustfmt::skip]
48813        let a = _mm512_set_epi32(
48814            0, 1, 2, 3,
48815            4, 5, 6, 7,
48816            8, 9, 10, 11,
48817            12, 13, i32::MIN, i32::MIN,
48818        );
48819        let r = _mm512_cvtusepi32_epi8(a);
48820        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48821        assert_eq_m128i(r, e);
48822    }
48823
48824    #[simd_test(enable = "avx512f")]
48825    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48826        #[rustfmt::skip]
48827        let a = _mm512_set_epi32(
48828            0, 1, 2, 3,
48829            4, 5, 6, 7,
48830            8, 9, 10, 11,
48831            12, 13, i32::MIN, i32::MIN,
48832        );
48833        let src = _mm_set1_epi8(-1);
48834        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48835        assert_eq_m128i(r, src);
48836        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48837        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48838        assert_eq_m128i(r, e);
48839    }
48840
48841    #[simd_test(enable = "avx512f")]
48842    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48843        #[rustfmt::skip]
48844        let a = _mm512_set_epi32(
48845            0, 1, 2, 3,
48846            4, 5, 6, 7,
48847            8, 9, 10, 11,
48848            12, 13, i32::MIN, i32::MIN,
48849        );
48850        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48851        assert_eq_m128i(r, _mm_setzero_si128());
48852        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48853        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48854        assert_eq_m128i(r, e);
48855    }
48856
48857    #[simd_test(enable = "avx512f,avx512vl")]
48858    unsafe fn test_mm256_cvtusepi32_epi8() {
48859        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48860        let r = _mm256_cvtusepi32_epi8(a);
48861        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48862        assert_eq_m128i(r, e);
48863    }
48864
48865    #[simd_test(enable = "avx512f,avx512vl")]
48866    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48867        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48868        let src = _mm_set1_epi8(0);
48869        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48870        assert_eq_m128i(r, src);
48871        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48872        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48873        assert_eq_m128i(r, e);
48874    }
48875
48876    #[simd_test(enable = "avx512f,avx512vl")]
48877    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48878        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48879        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48880        assert_eq_m128i(r, _mm_setzero_si128());
48881        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48882        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48883        assert_eq_m128i(r, e);
48884    }
48885
48886    #[simd_test(enable = "avx512f,avx512vl")]
48887    unsafe fn test_mm_cvtusepi32_epi8() {
48888        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48889        let r = _mm_cvtusepi32_epi8(a);
48890        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48891        assert_eq_m128i(r, e);
48892    }
48893
48894    #[simd_test(enable = "avx512f,avx512vl")]
48895    unsafe fn test_mm_mask_cvtusepi32_epi8() {
48896        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48897        let src = _mm_set1_epi8(0);
48898        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48899        assert_eq_m128i(r, src);
48900        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48901        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48902        assert_eq_m128i(r, e);
48903    }
48904
48905    #[simd_test(enable = "avx512f,avx512vl")]
48906    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48907        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48908        let r = _mm_maskz_cvtusepi32_epi8(0, a);
48909        assert_eq_m128i(r, _mm_setzero_si128());
48910        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48911        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48912        assert_eq_m128i(r, e);
48913    }
48914
48915    #[simd_test(enable = "avx512f")]
48916    unsafe fn test_mm512_cvt_roundps_epi32() {
48917        let a = _mm512_setr_ps(
48918            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48919        );
48920        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48921        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48922        assert_eq_m512i(r, e);
48923        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48924        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48925        assert_eq_m512i(r, e);
48926    }
48927
48928    #[simd_test(enable = "avx512f")]
48929    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48930        let a = _mm512_setr_ps(
48931            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48932        );
48933        let src = _mm512_set1_epi32(0);
48934        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48935            src, 0, a,
48936        );
48937        assert_eq_m512i(r, src);
48938        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48939            src,
48940            0b00000000_11111111,
48941            a,
48942        );
48943        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48944        assert_eq_m512i(r, e);
48945    }
48946
48947    #[simd_test(enable = "avx512f")]
48948    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48949        let a = _mm512_setr_ps(
48950            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48951        );
48952        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48953            0, a,
48954        );
48955        assert_eq_m512i(r, _mm512_setzero_si512());
48956        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48957            0b00000000_11111111,
48958            a,
48959        );
48960        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48961        assert_eq_m512i(r, e);
48962    }
48963
48964    #[simd_test(enable = "avx512f")]
48965    unsafe fn test_mm512_cvt_roundps_epu32() {
48966        let a = _mm512_setr_ps(
48967            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48968        );
48969        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48970        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
48971        assert_eq_m512i(r, e);
48972        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48973        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48974        assert_eq_m512i(r, e);
48975    }
48976
48977    #[simd_test(enable = "avx512f")]
48978    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48979        let a = _mm512_setr_ps(
48980            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48981        );
48982        let src = _mm512_set1_epi32(0);
48983        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48984            src, 0, a,
48985        );
48986        assert_eq_m512i(r, src);
48987        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48988            src,
48989            0b00000000_11111111,
48990            a,
48991        );
48992        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48993        assert_eq_m512i(r, e);
48994    }
48995
48996    #[simd_test(enable = "avx512f")]
48997    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48998        let a = _mm512_setr_ps(
48999            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49000        );
49001        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49002            0, a,
49003        );
49004        assert_eq_m512i(r, _mm512_setzero_si512());
49005        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49006            0b00000000_11111111,
49007            a,
49008        );
49009        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49010        assert_eq_m512i(r, e);
49011    }
49012
49013    #[simd_test(enable = "avx512f")]
49014    unsafe fn test_mm512_cvt_roundepi32_ps() {
49015        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49016        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49017        let e = _mm512_setr_ps(
49018            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49019        );
49020        assert_eq_m512(r, e);
49021    }
49022
49023    #[simd_test(enable = "avx512f")]
49024    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49025        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49026        let src = _mm512_set1_ps(0.);
49027        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49028            src, 0, a,
49029        );
49030        assert_eq_m512(r, src);
49031        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49032            src,
49033            0b00000000_11111111,
49034            a,
49035        );
49036        let e = _mm512_setr_ps(
49037            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49038        );
49039        assert_eq_m512(r, e);
49040    }
49041
49042    #[simd_test(enable = "avx512f")]
49043    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49044        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49045        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49046            0, a,
49047        );
49048        assert_eq_m512(r, _mm512_setzero_ps());
49049        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49050            0b00000000_11111111,
49051            a,
49052        );
49053        let e = _mm512_setr_ps(
49054            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49055        );
49056        assert_eq_m512(r, e);
49057    }
49058
49059    #[simd_test(enable = "avx512f")]
49060    unsafe fn test_mm512_cvt_roundepu32_ps() {
49061        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49062        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49063        #[rustfmt::skip]
49064        let e = _mm512_setr_ps(
49065            0., 4294967300., 2., 4294967300.,
49066            4., 4294967300., 6., 4294967300.,
49067            8., 10., 10., 12.,
49068            12., 14., 14., 16.,
49069        );
49070        assert_eq_m512(r, e);
49071    }
49072
49073    #[simd_test(enable = "avx512f")]
49074    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49075        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49076        let src = _mm512_set1_ps(0.);
49077        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49078            src, 0, a,
49079        );
49080        assert_eq_m512(r, src);
49081        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49082            src,
49083            0b00000000_11111111,
49084            a,
49085        );
49086        #[rustfmt::skip]
49087        let e = _mm512_setr_ps(
49088            0., 4294967300., 2., 4294967300.,
49089            4., 4294967300., 6., 4294967300.,
49090            0., 0., 0., 0.,
49091            0., 0., 0., 0.,
49092        );
49093        assert_eq_m512(r, e);
49094    }
49095
49096    #[simd_test(enable = "avx512f")]
49097    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49098        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49099        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49100            0, a,
49101        );
49102        assert_eq_m512(r, _mm512_setzero_ps());
49103        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49104            0b00000000_11111111,
49105            a,
49106        );
49107        #[rustfmt::skip]
49108        let e = _mm512_setr_ps(
49109            0., 4294967300., 2., 4294967300.,
49110            4., 4294967300., 6., 4294967300.,
49111            0., 0., 0., 0.,
49112            0., 0., 0., 0.,
49113        );
49114        assert_eq_m512(r, e);
49115    }
49116
49117    #[simd_test(enable = "avx512f")]
49118    unsafe fn test_mm512_cvt_roundps_ph() {
49119        let a = _mm512_set1_ps(1.);
49120        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49121        let e = _mm256_setr_epi64x(
49122            4323521613979991040,
49123            4323521613979991040,
49124            4323521613979991040,
49125            4323521613979991040,
49126        );
49127        assert_eq_m256i(r, e);
49128    }
49129
49130    #[simd_test(enable = "avx512f")]
49131    unsafe fn test_mm512_mask_cvt_roundps_ph() {
49132        let a = _mm512_set1_ps(1.);
49133        let src = _mm256_set1_epi16(0);
49134        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49135        assert_eq_m256i(r, src);
49136        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49137        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49138        assert_eq_m256i(r, e);
49139    }
49140
49141    #[simd_test(enable = "avx512f")]
49142    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49143        let a = _mm512_set1_ps(1.);
49144        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49145        assert_eq_m256i(r, _mm256_setzero_si256());
49146        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49147        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49148        assert_eq_m256i(r, e);
49149    }
49150
49151    #[simd_test(enable = "avx512f,avx512vl")]
49152    unsafe fn test_mm256_mask_cvt_roundps_ph() {
49153        let a = _mm256_set1_ps(1.);
49154        let src = _mm_set1_epi16(0);
49155        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49156        assert_eq_m128i(r, src);
49157        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49158        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49159        assert_eq_m128i(r, e);
49160    }
49161
49162    #[simd_test(enable = "avx512f,avx512vl")]
49163    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49164        let a = _mm256_set1_ps(1.);
49165        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49166        assert_eq_m128i(r, _mm_setzero_si128());
49167        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49168        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49169        assert_eq_m128i(r, e);
49170    }
49171
49172    #[simd_test(enable = "avx512f,avx512vl")]
49173    unsafe fn test_mm_mask_cvt_roundps_ph() {
49174        let a = _mm_set1_ps(1.);
49175        let src = _mm_set1_epi16(0);
49176        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49177        assert_eq_m128i(r, src);
49178        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49179        let e = _mm_setr_epi64x(4323521613979991040, 0);
49180        assert_eq_m128i(r, e);
49181    }
49182
49183    #[simd_test(enable = "avx512f,avx512vl")]
49184    unsafe fn test_mm_maskz_cvt_roundps_ph() {
49185        let a = _mm_set1_ps(1.);
49186        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49187        assert_eq_m128i(r, _mm_setzero_si128());
49188        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49189        let e = _mm_setr_epi64x(4323521613979991040, 0);
49190        assert_eq_m128i(r, e);
49191    }
49192
49193    #[simd_test(enable = "avx512f")]
49194    unsafe fn test_mm512_cvtps_ph() {
49195        let a = _mm512_set1_ps(1.);
49196        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49197        let e = _mm256_setr_epi64x(
49198            4323521613979991040,
49199            4323521613979991040,
49200            4323521613979991040,
49201            4323521613979991040,
49202        );
49203        assert_eq_m256i(r, e);
49204    }
49205
49206    #[simd_test(enable = "avx512f")]
49207    unsafe fn test_mm512_mask_cvtps_ph() {
49208        let a = _mm512_set1_ps(1.);
49209        let src = _mm256_set1_epi16(0);
49210        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49211        assert_eq_m256i(r, src);
49212        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49213        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49214        assert_eq_m256i(r, e);
49215    }
49216
49217    #[simd_test(enable = "avx512f")]
49218    unsafe fn test_mm512_maskz_cvtps_ph() {
49219        let a = _mm512_set1_ps(1.);
49220        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49221        assert_eq_m256i(r, _mm256_setzero_si256());
49222        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49223        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49224        assert_eq_m256i(r, e);
49225    }
49226
49227    #[simd_test(enable = "avx512f,avx512vl")]
49228    unsafe fn test_mm256_mask_cvtps_ph() {
49229        let a = _mm256_set1_ps(1.);
49230        let src = _mm_set1_epi16(0);
49231        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49232        assert_eq_m128i(r, src);
49233        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49234        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49235        assert_eq_m128i(r, e);
49236    }
49237
49238    #[simd_test(enable = "avx512f,avx512vl")]
49239    unsafe fn test_mm256_maskz_cvtps_ph() {
49240        let a = _mm256_set1_ps(1.);
49241        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49242        assert_eq_m128i(r, _mm_setzero_si128());
49243        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49244        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49245        assert_eq_m128i(r, e);
49246    }
49247
49248    #[simd_test(enable = "avx512f,avx512vl")]
49249    unsafe fn test_mm_mask_cvtps_ph() {
49250        let a = _mm_set1_ps(1.);
49251        let src = _mm_set1_epi16(0);
49252        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49253        assert_eq_m128i(r, src);
49254        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49255        let e = _mm_setr_epi64x(4323521613979991040, 0);
49256        assert_eq_m128i(r, e);
49257    }
49258
49259    #[simd_test(enable = "avx512f,avx512vl")]
49260    unsafe fn test_mm_maskz_cvtps_ph() {
49261        let a = _mm_set1_ps(1.);
49262        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49263        assert_eq_m128i(r, _mm_setzero_si128());
49264        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49265        let e = _mm_setr_epi64x(4323521613979991040, 0);
49266        assert_eq_m128i(r, e);
49267    }
49268
49269    #[simd_test(enable = "avx512f")]
49270    unsafe fn test_mm512_cvt_roundph_ps() {
49271        let a = _mm256_setr_epi64x(
49272            4323521613979991040,
49273            4323521613979991040,
49274            4323521613979991040,
49275            4323521613979991040,
49276        );
49277        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49278        let e = _mm512_set1_ps(1.);
49279        assert_eq_m512(r, e);
49280    }
49281
49282    #[simd_test(enable = "avx512f")]
49283    unsafe fn test_mm512_mask_cvt_roundph_ps() {
49284        let a = _mm256_setr_epi64x(
49285            4323521613979991040,
49286            4323521613979991040,
49287            4323521613979991040,
49288            4323521613979991040,
49289        );
49290        let src = _mm512_set1_ps(0.);
49291        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49292        assert_eq_m512(r, src);
49293        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49294        let e = _mm512_setr_ps(
49295            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49296        );
49297        assert_eq_m512(r, e);
49298    }
49299
49300    #[simd_test(enable = "avx512f")]
49301    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49302        let a = _mm256_setr_epi64x(
49303            4323521613979991040,
49304            4323521613979991040,
49305            4323521613979991040,
49306            4323521613979991040,
49307        );
49308        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49309        assert_eq_m512(r, _mm512_setzero_ps());
49310        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49311        let e = _mm512_setr_ps(
49312            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49313        );
49314        assert_eq_m512(r, e);
49315    }
49316
49317    #[simd_test(enable = "avx512f")]
49318    unsafe fn test_mm512_cvtph_ps() {
49319        let a = _mm256_setr_epi64x(
49320            4323521613979991040,
49321            4323521613979991040,
49322            4323521613979991040,
49323            4323521613979991040,
49324        );
49325        let r = _mm512_cvtph_ps(a);
49326        let e = _mm512_set1_ps(1.);
49327        assert_eq_m512(r, e);
49328    }
49329
49330    #[simd_test(enable = "avx512f")]
49331    unsafe fn test_mm512_mask_cvtph_ps() {
49332        let a = _mm256_setr_epi64x(
49333            4323521613979991040,
49334            4323521613979991040,
49335            4323521613979991040,
49336            4323521613979991040,
49337        );
49338        let src = _mm512_set1_ps(0.);
49339        let r = _mm512_mask_cvtph_ps(src, 0, a);
49340        assert_eq_m512(r, src);
49341        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49342        let e = _mm512_setr_ps(
49343            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49344        );
49345        assert_eq_m512(r, e);
49346    }
49347
49348    #[simd_test(enable = "avx512f")]
49349    unsafe fn test_mm512_maskz_cvtph_ps() {
49350        let a = _mm256_setr_epi64x(
49351            4323521613979991040,
49352            4323521613979991040,
49353            4323521613979991040,
49354            4323521613979991040,
49355        );
49356        let r = _mm512_maskz_cvtph_ps(0, a);
49357        assert_eq_m512(r, _mm512_setzero_ps());
49358        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49359        let e = _mm512_setr_ps(
49360            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49361        );
49362        assert_eq_m512(r, e);
49363    }
49364
49365    #[simd_test(enable = "avx512f,avx512vl")]
49366    unsafe fn test_mm256_mask_cvtph_ps() {
49367        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49368        let src = _mm256_set1_ps(0.);
49369        let r = _mm256_mask_cvtph_ps(src, 0, a);
49370        assert_eq_m256(r, src);
49371        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49372        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49373        assert_eq_m256(r, e);
49374    }
49375
49376    #[simd_test(enable = "avx512f,avx512vl")]
49377    unsafe fn test_mm256_maskz_cvtph_ps() {
49378        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49379        let r = _mm256_maskz_cvtph_ps(0, a);
49380        assert_eq_m256(r, _mm256_setzero_ps());
49381        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49382        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49383        assert_eq_m256(r, e);
49384    }
49385
49386    #[simd_test(enable = "avx512f,avx512vl")]
49387    unsafe fn test_mm_mask_cvtph_ps() {
49388        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49389        let src = _mm_set1_ps(0.);
49390        let r = _mm_mask_cvtph_ps(src, 0, a);
49391        assert_eq_m128(r, src);
49392        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49393        let e = _mm_setr_ps(1., 1., 1., 1.);
49394        assert_eq_m128(r, e);
49395    }
49396
49397    #[simd_test(enable = "avx512f,avx512vl")]
49398    unsafe fn test_mm_maskz_cvtph_ps() {
49399        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49400        let r = _mm_maskz_cvtph_ps(0, a);
49401        assert_eq_m128(r, _mm_setzero_ps());
49402        let r = _mm_maskz_cvtph_ps(0b00001111, a);
49403        let e = _mm_setr_ps(1., 1., 1., 1.);
49404        assert_eq_m128(r, e);
49405    }
49406
49407    #[simd_test(enable = "avx512f")]
49408    unsafe fn test_mm512_cvtt_roundps_epi32() {
49409        let a = _mm512_setr_ps(
49410            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49411        );
49412        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49413        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49414        assert_eq_m512i(r, e);
49415    }
49416
49417    #[simd_test(enable = "avx512f")]
49418    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49419        let a = _mm512_setr_ps(
49420            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49421        );
49422        let src = _mm512_set1_epi32(0);
49423        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49424        assert_eq_m512i(r, src);
49425        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49426        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49427        assert_eq_m512i(r, e);
49428    }
49429
49430    #[simd_test(enable = "avx512f")]
49431    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49432        let a = _mm512_setr_ps(
49433            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49434        );
49435        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49436        assert_eq_m512i(r, _mm512_setzero_si512());
49437        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49438        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49439        assert_eq_m512i(r, e);
49440    }
49441
49442    #[simd_test(enable = "avx512f")]
49443    unsafe fn test_mm512_cvtt_roundps_epu32() {
49444        let a = _mm512_setr_ps(
49445            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49446        );
49447        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49448        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49449        assert_eq_m512i(r, e);
49450    }
49451
49452    #[simd_test(enable = "avx512f")]
49453    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49454        let a = _mm512_setr_ps(
49455            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49456        );
49457        let src = _mm512_set1_epi32(0);
49458        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49459        assert_eq_m512i(r, src);
49460        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49461        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49462        assert_eq_m512i(r, e);
49463    }
49464
49465    #[simd_test(enable = "avx512f")]
49466    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49467        let a = _mm512_setr_ps(
49468            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49469        );
49470        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49471        assert_eq_m512i(r, _mm512_setzero_si512());
49472        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49473        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49474        assert_eq_m512i(r, e);
49475    }
49476
49477    #[simd_test(enable = "avx512f")]
49478    unsafe fn test_mm512_cvttps_epi32() {
49479        let a = _mm512_setr_ps(
49480            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49481        );
49482        let r = _mm512_cvttps_epi32(a);
49483        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49484        assert_eq_m512i(r, e);
49485    }
49486
49487    #[simd_test(enable = "avx512f")]
49488    unsafe fn test_mm512_mask_cvttps_epi32() {
49489        let a = _mm512_setr_ps(
49490            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49491        );
49492        let src = _mm512_set1_epi32(0);
49493        let r = _mm512_mask_cvttps_epi32(src, 0, a);
49494        assert_eq_m512i(r, src);
49495        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49496        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49497        assert_eq_m512i(r, e);
49498    }
49499
49500    #[simd_test(enable = "avx512f")]
49501    unsafe fn test_mm512_maskz_cvttps_epi32() {
49502        let a = _mm512_setr_ps(
49503            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49504        );
49505        let r = _mm512_maskz_cvttps_epi32(0, a);
49506        assert_eq_m512i(r, _mm512_setzero_si512());
49507        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49508        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49509        assert_eq_m512i(r, e);
49510    }
49511
49512    #[simd_test(enable = "avx512f,avx512vl")]
49513    unsafe fn test_mm256_mask_cvttps_epi32() {
49514        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49515        let src = _mm256_set1_epi32(0);
49516        let r = _mm256_mask_cvttps_epi32(src, 0, a);
49517        assert_eq_m256i(r, src);
49518        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49519        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49520        assert_eq_m256i(r, e);
49521    }
49522
49523    #[simd_test(enable = "avx512f,avx512vl")]
49524    unsafe fn test_mm256_maskz_cvttps_epi32() {
49525        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49526        let r = _mm256_maskz_cvttps_epi32(0, a);
49527        assert_eq_m256i(r, _mm256_setzero_si256());
49528        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49529        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49530        assert_eq_m256i(r, e);
49531    }
49532
49533    #[simd_test(enable = "avx512f,avx512vl")]
49534    unsafe fn test_mm_mask_cvttps_epi32() {
49535        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49536        let src = _mm_set1_epi32(0);
49537        let r = _mm_mask_cvttps_epi32(src, 0, a);
49538        assert_eq_m128i(r, src);
49539        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49540        let e = _mm_set_epi32(12, 13, 14, 15);
49541        assert_eq_m128i(r, e);
49542    }
49543
49544    #[simd_test(enable = "avx512f,avx512vl")]
49545    unsafe fn test_mm_maskz_cvttps_epi32() {
49546        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49547        let r = _mm_maskz_cvttps_epi32(0, a);
49548        assert_eq_m128i(r, _mm_setzero_si128());
49549        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49550        let e = _mm_set_epi32(12, 13, 14, 15);
49551        assert_eq_m128i(r, e);
49552    }
49553
49554    #[simd_test(enable = "avx512f")]
49555    unsafe fn test_mm512_cvttps_epu32() {
49556        let a = _mm512_setr_ps(
49557            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49558        );
49559        let r = _mm512_cvttps_epu32(a);
49560        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49561        assert_eq_m512i(r, e);
49562    }
49563
49564    #[simd_test(enable = "avx512f")]
49565    unsafe fn test_mm512_mask_cvttps_epu32() {
49566        let a = _mm512_setr_ps(
49567            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49568        );
49569        let src = _mm512_set1_epi32(0);
49570        let r = _mm512_mask_cvttps_epu32(src, 0, a);
49571        assert_eq_m512i(r, src);
49572        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49573        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49574        assert_eq_m512i(r, e);
49575    }
49576
49577    #[simd_test(enable = "avx512f")]
49578    unsafe fn test_mm512_maskz_cvttps_epu32() {
49579        let a = _mm512_setr_ps(
49580            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49581        );
49582        let r = _mm512_maskz_cvttps_epu32(0, a);
49583        assert_eq_m512i(r, _mm512_setzero_si512());
49584        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49585        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49586        assert_eq_m512i(r, e);
49587    }
49588
49589    #[simd_test(enable = "avx512f,avx512vl")]
49590    unsafe fn test_mm256_cvttps_epu32() {
49591        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49592        let r = _mm256_cvttps_epu32(a);
49593        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49594        assert_eq_m256i(r, e);
49595    }
49596
49597    #[simd_test(enable = "avx512f,avx512vl")]
49598    unsafe fn test_mm256_mask_cvttps_epu32() {
49599        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49600        let src = _mm256_set1_epi32(0);
49601        let r = _mm256_mask_cvttps_epu32(src, 0, a);
49602        assert_eq_m256i(r, src);
49603        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49604        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49605        assert_eq_m256i(r, e);
49606    }
49607
49608    #[simd_test(enable = "avx512f,avx512vl")]
49609    unsafe fn test_mm256_maskz_cvttps_epu32() {
49610        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49611        let r = _mm256_maskz_cvttps_epu32(0, a);
49612        assert_eq_m256i(r, _mm256_setzero_si256());
49613        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49614        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49615        assert_eq_m256i(r, e);
49616    }
49617
49618    #[simd_test(enable = "avx512f,avx512vl")]
49619    unsafe fn test_mm_cvttps_epu32() {
49620        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49621        let r = _mm_cvttps_epu32(a);
49622        let e = _mm_set_epi32(12, 13, 14, 15);
49623        assert_eq_m128i(r, e);
49624    }
49625
49626    #[simd_test(enable = "avx512f,avx512vl")]
49627    unsafe fn test_mm_mask_cvttps_epu32() {
49628        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49629        let src = _mm_set1_epi32(0);
49630        let r = _mm_mask_cvttps_epu32(src, 0, a);
49631        assert_eq_m128i(r, src);
49632        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49633        let e = _mm_set_epi32(12, 13, 14, 15);
49634        assert_eq_m128i(r, e);
49635    }
49636
49637    #[simd_test(enable = "avx512f,avx512vl")]
49638    unsafe fn test_mm_maskz_cvttps_epu32() {
49639        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49640        let r = _mm_maskz_cvttps_epu32(0, a);
49641        assert_eq_m128i(r, _mm_setzero_si128());
49642        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49643        let e = _mm_set_epi32(12, 13, 14, 15);
49644        assert_eq_m128i(r, e);
49645    }
49646
49647    #[simd_test(enable = "avx512f")]
49648    unsafe fn test_mm512_i32gather_ps() {
49649        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49650        // A multiplier of 4 is word-addressing
49651        #[rustfmt::skip]
49652        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49653                                      120, 128, 136, 144, 152, 160, 168, 176);
49654        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
49655        #[rustfmt::skip]
49656        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49657                                         120., 128., 136., 144., 152., 160., 168., 176.));
49658    }
49659
49660    #[simd_test(enable = "avx512f")]
49661    unsafe fn test_mm512_mask_i32gather_ps() {
49662        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49663        let src = _mm512_set1_ps(2.);
49664        let mask = 0b10101010_10101010;
49665        #[rustfmt::skip]
49666        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49667                                      120, 128, 136, 144, 152, 160, 168, 176);
49668        // A multiplier of 4 is word-addressing
49669        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
49670        #[rustfmt::skip]
49671        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49672                                         2., 128., 2., 144., 2., 160., 2., 176.));
49673    }
49674
49675    #[simd_test(enable = "avx512f")]
49676    unsafe fn test_mm512_i32gather_epi32() {
49677        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49678        // A multiplier of 4 is word-addressing
49679        #[rustfmt::skip]
49680        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49681                                      120, 128, 136, 144, 152, 160, 168, 176);
49682        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
49683        #[rustfmt::skip]
49684        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49685                                             120, 128, 136, 144, 152, 160, 168, 176));
49686    }
49687
49688    #[simd_test(enable = "avx512f")]
49689    unsafe fn test_mm512_mask_i32gather_epi32() {
49690        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49691        let src = _mm512_set1_epi32(2);
49692        let mask = 0b10101010_10101010;
49693        let index = _mm512_setr_epi32(
49694            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49695        );
49696        // A multiplier of 4 is word-addressing
49697        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
49698        assert_eq_m512i(
49699            r,
49700            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49701        );
49702    }
49703
49704    #[simd_test(enable = "avx512f")]
49705    unsafe fn test_mm512_i32scatter_ps() {
49706        let mut arr = [0f32; 256];
49707        #[rustfmt::skip]
49708        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49709                                      128, 144, 160, 176, 192, 208, 224, 240);
49710        let src = _mm512_setr_ps(
49711            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49712        );
49713        // A multiplier of 4 is word-addressing
49714        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
49715        let mut expected = [0f32; 256];
49716        for i in 0..16 {
49717            expected[i * 16] = (i + 1) as f32;
49718        }
49719        assert_eq!(&arr[..], &expected[..],);
49720    }
49721
49722    #[simd_test(enable = "avx512f")]
49723    unsafe fn test_mm512_mask_i32scatter_ps() {
49724        let mut arr = [0f32; 256];
49725        let mask = 0b10101010_10101010;
49726        #[rustfmt::skip]
49727        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49728                                      128, 144, 160, 176, 192, 208, 224, 240);
49729        let src = _mm512_setr_ps(
49730            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49731        );
49732        // A multiplier of 4 is word-addressing
49733        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
49734        let mut expected = [0f32; 256];
49735        for i in 0..8 {
49736            expected[i * 32 + 16] = 2. * (i + 1) as f32;
49737        }
49738        assert_eq!(&arr[..], &expected[..],);
49739    }
49740
49741    #[simd_test(enable = "avx512f")]
49742    unsafe fn test_mm512_i32scatter_epi32() {
49743        let mut arr = [0i32; 256];
49744        #[rustfmt::skip]
49745
49746        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49747                                      128, 144, 160, 176, 192, 208, 224, 240);
49748        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49749        // A multiplier of 4 is word-addressing
49750        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
49751        let mut expected = [0i32; 256];
49752        for i in 0..16 {
49753            expected[i * 16] = (i + 1) as i32;
49754        }
49755        assert_eq!(&arr[..], &expected[..],);
49756    }
49757
49758    #[simd_test(enable = "avx512f")]
49759    unsafe fn test_mm512_mask_i32scatter_epi32() {
49760        let mut arr = [0i32; 256];
49761        let mask = 0b10101010_10101010;
49762        #[rustfmt::skip]
49763        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49764                                      128, 144, 160, 176, 192, 208, 224, 240);
49765        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49766        // A multiplier of 4 is word-addressing
49767        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
49768        let mut expected = [0i32; 256];
49769        for i in 0..8 {
49770            expected[i * 32 + 16] = 2 * (i + 1) as i32;
49771        }
49772        assert_eq!(&arr[..], &expected[..],);
49773    }
49774
49775    #[simd_test(enable = "avx512f")]
49776    unsafe fn test_mm512_cmplt_ps_mask() {
49777        #[rustfmt::skip]
49778        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49779                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49780        let b = _mm512_set1_ps(-1.);
49781        let m = _mm512_cmplt_ps_mask(a, b);
49782        assert_eq!(m, 0b00000101_00000101);
49783    }
49784
49785    #[simd_test(enable = "avx512f")]
49786    unsafe fn test_mm512_mask_cmplt_ps_mask() {
49787        #[rustfmt::skip]
49788        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49789                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49790        let b = _mm512_set1_ps(-1.);
49791        let mask = 0b01100110_01100110;
49792        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49793        assert_eq!(r, 0b00000100_00000100);
49794    }
49795
49796    #[simd_test(enable = "avx512f")]
49797    unsafe fn test_mm512_cmpnlt_ps_mask() {
49798        #[rustfmt::skip]
49799        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49800                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49801        let b = _mm512_set1_ps(-1.);
49802        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49803    }
49804
49805    #[simd_test(enable = "avx512f")]
49806    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49807        #[rustfmt::skip]
49808        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49809                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49810        let b = _mm512_set1_ps(-1.);
49811        let mask = 0b01111010_01111010;
49812        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49813    }
49814
49815    #[simd_test(enable = "avx512f")]
49816    unsafe fn test_mm512_cmpnle_ps_mask() {
49817        #[rustfmt::skip]
49818        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49819                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49820        let b = _mm512_set1_ps(-1.);
49821        let m = _mm512_cmpnle_ps_mask(b, a);
49822        assert_eq!(m, 0b00001101_00001101);
49823    }
49824
49825    #[simd_test(enable = "avx512f")]
49826    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49827        #[rustfmt::skip]
49828        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49829                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49830        let b = _mm512_set1_ps(-1.);
49831        let mask = 0b01100110_01100110;
49832        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49833        assert_eq!(r, 0b00000100_00000100);
49834    }
49835
49836    #[simd_test(enable = "avx512f")]
49837    unsafe fn test_mm512_cmple_ps_mask() {
49838        #[rustfmt::skip]
49839        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49840                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49841        let b = _mm512_set1_ps(-1.);
49842        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49843    }
49844
49845    #[simd_test(enable = "avx512f")]
49846    unsafe fn test_mm512_mask_cmple_ps_mask() {
49847        #[rustfmt::skip]
49848        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49849                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49850        let b = _mm512_set1_ps(-1.);
49851        let mask = 0b01111010_01111010;
49852        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49853    }
49854
49855    #[simd_test(enable = "avx512f")]
49856    unsafe fn test_mm512_cmpeq_ps_mask() {
49857        #[rustfmt::skip]
49858        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49859                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49860        #[rustfmt::skip]
49861        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49862                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49863        let m = _mm512_cmpeq_ps_mask(b, a);
49864        assert_eq!(m, 0b11001101_11001101);
49865    }
49866
49867    #[simd_test(enable = "avx512f")]
49868    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49869        #[rustfmt::skip]
49870        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49871                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49872        #[rustfmt::skip]
49873        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49874                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49875        let mask = 0b01111010_01111010;
49876        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49877        assert_eq!(r, 0b01001000_01001000);
49878    }
49879
49880    #[simd_test(enable = "avx512f")]
49881    unsafe fn test_mm512_cmpneq_ps_mask() {
49882        #[rustfmt::skip]
49883        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49884                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49885        #[rustfmt::skip]
49886        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49887                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49888        let m = _mm512_cmpneq_ps_mask(b, a);
49889        assert_eq!(m, 0b00110010_00110010);
49890    }
49891
49892    #[simd_test(enable = "avx512f")]
49893    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49894        #[rustfmt::skip]
49895        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49896                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49897        #[rustfmt::skip]
49898        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49899                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49900        let mask = 0b01111010_01111010;
49901        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49902        assert_eq!(r, 0b00110010_00110010)
49903    }
49904
49905    #[simd_test(enable = "avx512f")]
49906    unsafe fn test_mm512_cmp_ps_mask() {
49907        #[rustfmt::skip]
49908        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49909                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49910        let b = _mm512_set1_ps(-1.);
49911        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49912        assert_eq!(m, 0b00000101_00000101);
49913    }
49914
49915    #[simd_test(enable = "avx512f")]
49916    unsafe fn test_mm512_mask_cmp_ps_mask() {
49917        #[rustfmt::skip]
49918        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49919                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49920        let b = _mm512_set1_ps(-1.);
49921        let mask = 0b01100110_01100110;
49922        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49923        assert_eq!(r, 0b00000100_00000100);
49924    }
49925
49926    #[simd_test(enable = "avx512f,avx512vl")]
49927    unsafe fn test_mm256_cmp_ps_mask() {
49928        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49929        let b = _mm256_set1_ps(-1.);
49930        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49931        assert_eq!(m, 0b00000101);
49932    }
49933
49934    #[simd_test(enable = "avx512f,avx512vl")]
49935    unsafe fn test_mm256_mask_cmp_ps_mask() {
49936        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49937        let b = _mm256_set1_ps(-1.);
49938        let mask = 0b01100110;
49939        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49940        assert_eq!(r, 0b00000100);
49941    }
49942
49943    #[simd_test(enable = "avx512f,avx512vl")]
49944    unsafe fn test_mm_cmp_ps_mask() {
49945        let a = _mm_set_ps(0., 1., -1., 13.);
49946        let b = _mm_set1_ps(1.);
49947        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49948        assert_eq!(m, 0b00001010);
49949    }
49950
49951    #[simd_test(enable = "avx512f,avx512vl")]
49952    unsafe fn test_mm_mask_cmp_ps_mask() {
49953        let a = _mm_set_ps(0., 1., -1., 13.);
49954        let b = _mm_set1_ps(1.);
49955        let mask = 0b11111111;
49956        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49957        assert_eq!(r, 0b00001010);
49958    }
49959
49960    #[simd_test(enable = "avx512f")]
49961    unsafe fn test_mm512_cmp_round_ps_mask() {
49962        #[rustfmt::skip]
49963        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49964                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49965        let b = _mm512_set1_ps(-1.);
49966        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49967        assert_eq!(m, 0b00000101_00000101);
49968    }
49969
49970    #[simd_test(enable = "avx512f")]
49971    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49972        #[rustfmt::skip]
49973        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49974                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49975        let b = _mm512_set1_ps(-1.);
49976        let mask = 0b01100110_01100110;
49977        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49978        assert_eq!(r, 0b00000100_00000100);
49979    }
49980
49981    #[simd_test(enable = "avx512f")]
49982    unsafe fn test_mm512_cmpord_ps_mask() {
49983        #[rustfmt::skip]
49984        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49985                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49986        #[rustfmt::skip]
49987        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49988                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49989        let m = _mm512_cmpord_ps_mask(a, b);
49990        assert_eq!(m, 0b00000101_00000101);
49991    }
49992
49993    #[simd_test(enable = "avx512f")]
49994    unsafe fn test_mm512_mask_cmpord_ps_mask() {
49995        #[rustfmt::skip]
49996        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49997                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49998        #[rustfmt::skip]
49999        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50000                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50001        let mask = 0b11000011_11000011;
50002        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
50003        assert_eq!(m, 0b00000001_00000001);
50004    }
50005
50006    #[simd_test(enable = "avx512f")]
50007    unsafe fn test_mm512_cmpunord_ps_mask() {
50008        #[rustfmt::skip]
50009        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50010                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50011        #[rustfmt::skip]
50012        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50013                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50014        let m = _mm512_cmpunord_ps_mask(a, b);
50015
50016        assert_eq!(m, 0b11111010_11111010);
50017    }
50018
50019    #[simd_test(enable = "avx512f")]
50020    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50021        #[rustfmt::skip]
50022        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50023                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50024        #[rustfmt::skip]
50025        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50026                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50027        let mask = 0b00001111_00001111;
50028        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50029        assert_eq!(m, 0b000001010_00001010);
50030    }
50031
50032    #[simd_test(enable = "avx512f")]
50033    unsafe fn test_mm_cmp_ss_mask() {
50034        let a = _mm_setr_ps(2., 1., 1., 1.);
50035        let b = _mm_setr_ps(1., 2., 2., 2.);
50036        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50037        assert_eq!(m, 1);
50038    }
50039
50040    #[simd_test(enable = "avx512f")]
50041    unsafe fn test_mm_mask_cmp_ss_mask() {
50042        let a = _mm_setr_ps(2., 1., 1., 1.);
50043        let b = _mm_setr_ps(1., 2., 2., 2.);
50044        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50045        assert_eq!(m, 0);
50046        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50047        assert_eq!(m, 1);
50048    }
50049
50050    #[simd_test(enable = "avx512f")]
50051    unsafe fn test_mm_cmp_round_ss_mask() {
50052        let a = _mm_setr_ps(2., 1., 1., 1.);
50053        let b = _mm_setr_ps(1., 2., 2., 2.);
50054        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50055        assert_eq!(m, 1);
50056    }
50057
50058    #[simd_test(enable = "avx512f")]
50059    unsafe fn test_mm_mask_cmp_round_ss_mask() {
50060        let a = _mm_setr_ps(2., 1., 1., 1.);
50061        let b = _mm_setr_ps(1., 2., 2., 2.);
50062        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50063        assert_eq!(m, 0);
50064        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50065        assert_eq!(m, 1);
50066    }
50067
50068    #[simd_test(enable = "avx512f")]
50069    unsafe fn test_mm_cmp_sd_mask() {
50070        let a = _mm_setr_pd(2., 1.);
50071        let b = _mm_setr_pd(1., 2.);
50072        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50073        assert_eq!(m, 1);
50074    }
50075
50076    #[simd_test(enable = "avx512f")]
50077    unsafe fn test_mm_mask_cmp_sd_mask() {
50078        let a = _mm_setr_pd(2., 1.);
50079        let b = _mm_setr_pd(1., 2.);
50080        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50081        assert_eq!(m, 0);
50082        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50083        assert_eq!(m, 1);
50084    }
50085
50086    #[simd_test(enable = "avx512f")]
50087    unsafe fn test_mm_cmp_round_sd_mask() {
50088        let a = _mm_setr_pd(2., 1.);
50089        let b = _mm_setr_pd(1., 2.);
50090        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50091        assert_eq!(m, 1);
50092    }
50093
50094    #[simd_test(enable = "avx512f")]
50095    unsafe fn test_mm_mask_cmp_round_sd_mask() {
50096        let a = _mm_setr_pd(2., 1.);
50097        let b = _mm_setr_pd(1., 2.);
50098        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50099        assert_eq!(m, 0);
50100        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50101        assert_eq!(m, 1);
50102    }
50103
50104    #[simd_test(enable = "avx512f")]
50105    unsafe fn test_mm512_cmplt_epu32_mask() {
50106        #[rustfmt::skip]
50107        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50108                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50109        let b = _mm512_set1_epi32(-1);
50110        let m = _mm512_cmplt_epu32_mask(a, b);
50111        assert_eq!(m, 0b11001111_11001111);
50112    }
50113
50114    #[simd_test(enable = "avx512f")]
50115    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50116        #[rustfmt::skip]
50117        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50118                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50119        let b = _mm512_set1_epi32(-1);
50120        let mask = 0b01111010_01111010;
50121        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50122        assert_eq!(r, 0b01001010_01001010);
50123    }
50124
50125    #[simd_test(enable = "avx512f,avx512vl")]
50126    unsafe fn test_mm256_cmplt_epu32_mask() {
50127        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50128        let b = _mm256_set1_epi32(1);
50129        let r = _mm256_cmplt_epu32_mask(a, b);
50130        assert_eq!(r, 0b10000000);
50131    }
50132
50133    #[simd_test(enable = "avx512f,avx512vl")]
50134    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50135        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50136        let b = _mm256_set1_epi32(1);
50137        let mask = 0b11111111;
50138        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50139        assert_eq!(r, 0b10000000);
50140    }
50141
50142    #[simd_test(enable = "avx512f,avx512vl")]
50143    unsafe fn test_mm_cmplt_epu32_mask() {
50144        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50145        let b = _mm_set1_epi32(1);
50146        let r = _mm_cmplt_epu32_mask(a, b);
50147        assert_eq!(r, 0b00001000);
50148    }
50149
50150    #[simd_test(enable = "avx512f,avx512vl")]
50151    unsafe fn test_mm_mask_cmplt_epu32_mask() {
50152        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50153        let b = _mm_set1_epi32(1);
50154        let mask = 0b11111111;
50155        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50156        assert_eq!(r, 0b00001000);
50157    }
50158
50159    #[simd_test(enable = "avx512f")]
50160    unsafe fn test_mm512_cmpgt_epu32_mask() {
50161        #[rustfmt::skip]
50162        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50163                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50164        let b = _mm512_set1_epi32(-1);
50165        let m = _mm512_cmpgt_epu32_mask(b, a);
50166        assert_eq!(m, 0b11001111_11001111);
50167    }
50168
50169    #[simd_test(enable = "avx512f")]
50170    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50171        #[rustfmt::skip]
50172        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50173                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50174        let b = _mm512_set1_epi32(-1);
50175        let mask = 0b01111010_01111010;
50176        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50177        assert_eq!(r, 0b01001010_01001010);
50178    }
50179
50180    #[simd_test(enable = "avx512f,avx512vl")]
50181    unsafe fn test_mm256_cmpgt_epu32_mask() {
50182        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50183        let b = _mm256_set1_epi32(1);
50184        let r = _mm256_cmpgt_epu32_mask(a, b);
50185        assert_eq!(r, 0b00111111);
50186    }
50187
50188    #[simd_test(enable = "avx512f,avx512vl")]
50189    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50190        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50191        let b = _mm256_set1_epi32(1);
50192        let mask = 0b11111111;
50193        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50194        assert_eq!(r, 0b00111111);
50195    }
50196
50197    #[simd_test(enable = "avx512f,avx512vl")]
50198    unsafe fn test_mm_cmpgt_epu32_mask() {
50199        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50200        let b = _mm_set1_epi32(1);
50201        let r = _mm_cmpgt_epu32_mask(a, b);
50202        assert_eq!(r, 0b00000011);
50203    }
50204
50205    #[simd_test(enable = "avx512f,avx512vl")]
50206    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50207        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50208        let b = _mm_set1_epi32(1);
50209        let mask = 0b11111111;
50210        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50211        assert_eq!(r, 0b00000011);
50212    }
50213
50214    #[simd_test(enable = "avx512f")]
50215    unsafe fn test_mm512_cmple_epu32_mask() {
50216        #[rustfmt::skip]
50217        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50218                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50219        let b = _mm512_set1_epi32(-1);
50220        assert_eq!(
50221            _mm512_cmple_epu32_mask(a, b),
50222            !_mm512_cmpgt_epu32_mask(a, b)
50223        )
50224    }
50225
50226    #[simd_test(enable = "avx512f")]
50227    unsafe fn test_mm512_mask_cmple_epu32_mask() {
50228        #[rustfmt::skip]
50229        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50230                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50231        let b = _mm512_set1_epi32(-1);
50232        let mask = 0b01111010_01111010;
50233        assert_eq!(
50234            _mm512_mask_cmple_epu32_mask(mask, a, b),
50235            0b01111010_01111010
50236        );
50237    }
50238
50239    #[simd_test(enable = "avx512f,avx512vl")]
50240    unsafe fn test_mm256_cmple_epu32_mask() {
50241        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50242        let b = _mm256_set1_epi32(1);
50243        let r = _mm256_cmple_epu32_mask(a, b);
50244        assert_eq!(r, 0b11000000)
50245    }
50246
50247    #[simd_test(enable = "avx512f,avx512vl")]
50248    unsafe fn test_mm256_mask_cmple_epu32_mask() {
50249        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50250        let b = _mm256_set1_epi32(1);
50251        let mask = 0b11111111;
50252        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50253        assert_eq!(r, 0b11000000)
50254    }
50255
50256    #[simd_test(enable = "avx512f,avx512vl")]
50257    unsafe fn test_mm_cmple_epu32_mask() {
50258        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50259        let b = _mm_set1_epi32(1);
50260        let r = _mm_cmple_epu32_mask(a, b);
50261        assert_eq!(r, 0b00001100)
50262    }
50263
50264    #[simd_test(enable = "avx512f,avx512vl")]
50265    unsafe fn test_mm_mask_cmple_epu32_mask() {
50266        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50267        let b = _mm_set1_epi32(1);
50268        let mask = 0b11111111;
50269        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50270        assert_eq!(r, 0b00001100)
50271    }
50272
50273    #[simd_test(enable = "avx512f")]
50274    unsafe fn test_mm512_cmpge_epu32_mask() {
50275        #[rustfmt::skip]
50276        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50277                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50278        let b = _mm512_set1_epi32(-1);
50279        assert_eq!(
50280            _mm512_cmpge_epu32_mask(a, b),
50281            !_mm512_cmplt_epu32_mask(a, b)
50282        )
50283    }
50284
50285    #[simd_test(enable = "avx512f")]
50286    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50287        #[rustfmt::skip]
50288        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50289                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50290        let b = _mm512_set1_epi32(-1);
50291        let mask = 0b01111010_01111010;
50292        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50293    }
50294
50295    #[simd_test(enable = "avx512f,avx512vl")]
50296    unsafe fn test_mm256_cmpge_epu32_mask() {
50297        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50298        let b = _mm256_set1_epi32(1);
50299        let r = _mm256_cmpge_epu32_mask(a, b);
50300        assert_eq!(r, 0b01111111)
50301    }
50302
50303    #[simd_test(enable = "avx512f,avx512vl")]
50304    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50305        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50306        let b = _mm256_set1_epi32(1);
50307        let mask = 0b11111111;
50308        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50309        assert_eq!(r, 0b01111111)
50310    }
50311
50312    #[simd_test(enable = "avx512f,avx512vl")]
50313    unsafe fn test_mm_cmpge_epu32_mask() {
50314        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50315        let b = _mm_set1_epi32(1);
50316        let r = _mm_cmpge_epu32_mask(a, b);
50317        assert_eq!(r, 0b00000111)
50318    }
50319
50320    #[simd_test(enable = "avx512f,avx512vl")]
50321    unsafe fn test_mm_mask_cmpge_epu32_mask() {
50322        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50323        let b = _mm_set1_epi32(1);
50324        let mask = 0b11111111;
50325        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50326        assert_eq!(r, 0b00000111)
50327    }
50328
50329    #[simd_test(enable = "avx512f")]
50330    unsafe fn test_mm512_cmpeq_epu32_mask() {
50331        #[rustfmt::skip]
50332        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50333                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50334        #[rustfmt::skip]
50335        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50336                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50337        let m = _mm512_cmpeq_epu32_mask(b, a);
50338        assert_eq!(m, 0b11001111_11001111);
50339    }
50340
50341    #[simd_test(enable = "avx512f")]
50342    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50343        #[rustfmt::skip]
50344        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50345                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50346        #[rustfmt::skip]
50347        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50348                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50349        let mask = 0b01111010_01111010;
50350        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50351        assert_eq!(r, 0b01001010_01001010);
50352    }
50353
50354    #[simd_test(enable = "avx512f,avx512vl")]
50355    unsafe fn test_mm256_cmpeq_epu32_mask() {
50356        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50357        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50358        let m = _mm256_cmpeq_epu32_mask(b, a);
50359        assert_eq!(m, 0b11001111);
50360    }
50361
50362    #[simd_test(enable = "avx512f,avx512vl")]
50363    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50364        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50365        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50366        let mask = 0b01111010;
50367        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50368        assert_eq!(r, 0b01001010);
50369    }
50370
50371    #[simd_test(enable = "avx512f,avx512vl")]
50372    unsafe fn test_mm_cmpeq_epu32_mask() {
50373        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50374        let b = _mm_set_epi32(0, 1, 13, 42);
50375        let m = _mm_cmpeq_epu32_mask(b, a);
50376        assert_eq!(m, 0b00001100);
50377    }
50378
50379    #[simd_test(enable = "avx512f,avx512vl")]
50380    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50381        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50382        let b = _mm_set_epi32(0, 1, 13, 42);
50383        let mask = 0b11111111;
50384        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50385        assert_eq!(r, 0b00001100);
50386    }
50387
50388    #[simd_test(enable = "avx512f")]
50389    unsafe fn test_mm512_cmpneq_epu32_mask() {
50390        #[rustfmt::skip]
50391        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50392                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50393        #[rustfmt::skip]
50394        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50395                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50396        let m = _mm512_cmpneq_epu32_mask(b, a);
50397        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50398    }
50399
50400    #[simd_test(enable = "avx512f")]
50401    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50402        #[rustfmt::skip]
50403        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50404                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50405        #[rustfmt::skip]
50406        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50407                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50408        let mask = 0b01111010_01111010;
50409        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50410        assert_eq!(r, 0b00110010_00110010);
50411    }
50412
50413    #[simd_test(enable = "avx512f,avx512vl")]
50414    unsafe fn test_mm256_cmpneq_epu32_mask() {
50415        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50416        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50417        let r = _mm256_cmpneq_epu32_mask(b, a);
50418        assert_eq!(r, 0b00110000);
50419    }
50420
50421    #[simd_test(enable = "avx512f,avx512vl")]
50422    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50423        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50424        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50425        let mask = 0b11111111;
50426        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50427        assert_eq!(r, 0b00110000);
50428    }
50429
50430    #[simd_test(enable = "avx512f,avx512vl")]
50431    unsafe fn test_mm_cmpneq_epu32_mask() {
50432        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50433        let b = _mm_set_epi32(0, 1, 13, 42);
50434        let r = _mm_cmpneq_epu32_mask(b, a);
50435        assert_eq!(r, 0b00000011);
50436    }
50437
50438    #[simd_test(enable = "avx512f,avx512vl")]
50439    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50440        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50441        let b = _mm_set_epi32(0, 1, 13, 42);
50442        let mask = 0b11111111;
50443        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50444        assert_eq!(r, 0b00000011);
50445    }
50446
50447    #[simd_test(enable = "avx512f")]
50448    unsafe fn test_mm512_cmp_epu32_mask() {
50449        #[rustfmt::skip]
50450        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50451                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50452        let b = _mm512_set1_epi32(-1);
50453        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50454        assert_eq!(m, 0b11001111_11001111);
50455    }
50456
50457    #[simd_test(enable = "avx512f")]
50458    unsafe fn test_mm512_mask_cmp_epu32_mask() {
50459        #[rustfmt::skip]
50460        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50461                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50462        let b = _mm512_set1_epi32(-1);
50463        let mask = 0b01111010_01111010;
50464        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50465        assert_eq!(r, 0b01001010_01001010);
50466    }
50467
50468    #[simd_test(enable = "avx512f,avx512vl")]
50469    unsafe fn test_mm256_cmp_epu32_mask() {
50470        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50471        let b = _mm256_set1_epi32(-1);
50472        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50473        assert_eq!(m, 0b11001111);
50474    }
50475
50476    #[simd_test(enable = "avx512f,avx512vl")]
50477    unsafe fn test_mm256_mask_cmp_epu32_mask() {
50478        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50479        let b = _mm256_set1_epi32(-1);
50480        let mask = 0b11111111;
50481        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50482        assert_eq!(r, 0b11001111);
50483    }
50484
50485    #[simd_test(enable = "avx512f,avx512vl")]
50486    unsafe fn test_mm_cmp_epu32_mask() {
50487        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50488        let b = _mm_set1_epi32(1);
50489        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50490        assert_eq!(m, 0b00001000);
50491    }
50492
50493    #[simd_test(enable = "avx512f,avx512vl")]
50494    unsafe fn test_mm_mask_cmp_epu32_mask() {
50495        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50496        let b = _mm_set1_epi32(1);
50497        let mask = 0b11111111;
50498        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50499        assert_eq!(r, 0b00001000);
50500    }
50501
50502    #[simd_test(enable = "avx512f")]
50503    unsafe fn test_mm512_cmplt_epi32_mask() {
50504        #[rustfmt::skip]
50505        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50506                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50507        let b = _mm512_set1_epi32(-1);
50508        let m = _mm512_cmplt_epi32_mask(a, b);
50509        assert_eq!(m, 0b00000101_00000101);
50510    }
50511
50512    #[simd_test(enable = "avx512f")]
50513    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50514        #[rustfmt::skip]
50515        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50516                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50517        let b = _mm512_set1_epi32(-1);
50518        let mask = 0b01100110_01100110;
50519        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50520        assert_eq!(r, 0b00000100_00000100);
50521    }
50522
50523    #[simd_test(enable = "avx512f,avx512vl")]
50524    unsafe fn test_mm256_cmplt_epi32_mask() {
50525        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50526        let b = _mm256_set1_epi32(-1);
50527        let r = _mm256_cmplt_epi32_mask(a, b);
50528        assert_eq!(r, 0b00000101);
50529    }
50530
50531    #[simd_test(enable = "avx512f,avx512vl")]
50532    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50533        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50534        let b = _mm256_set1_epi32(-1);
50535        let mask = 0b11111111;
50536        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50537        assert_eq!(r, 0b00000101);
50538    }
50539
50540    #[simd_test(enable = "avx512f,avx512vl")]
50541    unsafe fn test_mm_cmplt_epi32_mask() {
50542        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50543        let b = _mm_set1_epi32(-1);
50544        let r = _mm_cmplt_epi32_mask(a, b);
50545        assert_eq!(r, 0b00000101);
50546    }
50547
50548    #[simd_test(enable = "avx512f,avx512vl")]
50549    unsafe fn test_mm_mask_cmplt_epi32_mask() {
50550        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50551        let b = _mm_set1_epi32(-1);
50552        let mask = 0b11111111;
50553        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50554        assert_eq!(r, 0b00000101);
50555    }
50556
50557    #[simd_test(enable = "avx512f")]
50558    unsafe fn test_mm512_cmpgt_epi32_mask() {
50559        #[rustfmt::skip]
50560        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50561                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50562        let b = _mm512_set1_epi32(-1);
50563        let m = _mm512_cmpgt_epi32_mask(b, a);
50564        assert_eq!(m, 0b00000101_00000101);
50565    }
50566
50567    #[simd_test(enable = "avx512f")]
50568    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50569        #[rustfmt::skip]
50570        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50571                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50572        let b = _mm512_set1_epi32(-1);
50573        let mask = 0b01100110_01100110;
50574        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50575        assert_eq!(r, 0b00000100_00000100);
50576    }
50577
50578    #[simd_test(enable = "avx512f,avx512vl")]
50579    unsafe fn test_mm256_cmpgt_epi32_mask() {
50580        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50581        let b = _mm256_set1_epi32(-1);
50582        let r = _mm256_cmpgt_epi32_mask(a, b);
50583        assert_eq!(r, 0b11011010);
50584    }
50585
50586    #[simd_test(enable = "avx512f,avx512vl")]
50587    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50588        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50589        let b = _mm256_set1_epi32(-1);
50590        let mask = 0b11111111;
50591        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50592        assert_eq!(r, 0b11011010);
50593    }
50594
50595    #[simd_test(enable = "avx512f,avx512vl")]
50596    unsafe fn test_mm_cmpgt_epi32_mask() {
50597        let a = _mm_set_epi32(0, 1, -1, 13);
50598        let b = _mm_set1_epi32(-1);
50599        let r = _mm_cmpgt_epi32_mask(a, b);
50600        assert_eq!(r, 0b00001101);
50601    }
50602
50603    #[simd_test(enable = "avx512f,avx512vl")]
50604    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50605        let a = _mm_set_epi32(0, 1, -1, 13);
50606        let b = _mm_set1_epi32(-1);
50607        let mask = 0b11111111;
50608        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50609        assert_eq!(r, 0b00001101);
50610    }
50611
50612    #[simd_test(enable = "avx512f")]
50613    unsafe fn test_mm512_cmple_epi32_mask() {
50614        #[rustfmt::skip]
50615        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50616                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50617        let b = _mm512_set1_epi32(-1);
50618        assert_eq!(
50619            _mm512_cmple_epi32_mask(a, b),
50620            !_mm512_cmpgt_epi32_mask(a, b)
50621        )
50622    }
50623
50624    #[simd_test(enable = "avx512f")]
50625    unsafe fn test_mm512_mask_cmple_epi32_mask() {
50626        #[rustfmt::skip]
50627        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50628                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50629        let b = _mm512_set1_epi32(-1);
50630        let mask = 0b01111010_01111010;
50631        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50632    }
50633
50634    #[simd_test(enable = "avx512f,avx512vl")]
50635    unsafe fn test_mm256_cmple_epi32_mask() {
50636        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50637        let b = _mm256_set1_epi32(-1);
50638        let r = _mm256_cmple_epi32_mask(a, b);
50639        assert_eq!(r, 0b00100101)
50640    }
50641
50642    #[simd_test(enable = "avx512f,avx512vl")]
50643    unsafe fn test_mm256_mask_cmple_epi32_mask() {
50644        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50645        let b = _mm256_set1_epi32(-1);
50646        let mask = 0b11111111;
50647        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50648        assert_eq!(r, 0b00100101)
50649    }
50650
50651    #[simd_test(enable = "avx512f,avx512vl")]
50652    unsafe fn test_mm_cmple_epi32_mask() {
50653        let a = _mm_set_epi32(0, 1, -1, 200);
50654        let b = _mm_set1_epi32(-1);
50655        let r = _mm_cmple_epi32_mask(a, b);
50656        assert_eq!(r, 0b00000010)
50657    }
50658
50659    #[simd_test(enable = "avx512f,avx512vl")]
50660    unsafe fn test_mm_mask_cmple_epi32_mask() {
50661        let a = _mm_set_epi32(0, 1, -1, 200);
50662        let b = _mm_set1_epi32(-1);
50663        let mask = 0b11111111;
50664        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50665        assert_eq!(r, 0b00000010)
50666    }
50667
50668    #[simd_test(enable = "avx512f")]
50669    unsafe fn test_mm512_cmpge_epi32_mask() {
50670        #[rustfmt::skip]
50671        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50672                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50673        let b = _mm512_set1_epi32(-1);
50674        assert_eq!(
50675            _mm512_cmpge_epi32_mask(a, b),
50676            !_mm512_cmplt_epi32_mask(a, b)
50677        )
50678    }
50679
50680    #[simd_test(enable = "avx512f")]
50681    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50682        #[rustfmt::skip]
50683        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50684                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50685        let b = _mm512_set1_epi32(-1);
50686        let mask = 0b01111010_01111010;
50687        assert_eq!(
50688            _mm512_mask_cmpge_epi32_mask(mask, a, b),
50689            0b01111010_01111010
50690        );
50691    }
50692
50693    #[simd_test(enable = "avx512f,avx512vl")]
50694    unsafe fn test_mm256_cmpge_epi32_mask() {
50695        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50696        let b = _mm256_set1_epi32(-1);
50697        let r = _mm256_cmpge_epi32_mask(a, b);
50698        assert_eq!(r, 0b11111010)
50699    }
50700
50701    #[simd_test(enable = "avx512f,avx512vl")]
50702    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50703        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50704        let b = _mm256_set1_epi32(-1);
50705        let mask = 0b11111111;
50706        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50707        assert_eq!(r, 0b11111010)
50708    }
50709
50710    #[simd_test(enable = "avx512f,avx512vl")]
50711    unsafe fn test_mm_cmpge_epi32_mask() {
50712        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50713        let b = _mm_set1_epi32(-1);
50714        let r = _mm_cmpge_epi32_mask(a, b);
50715        assert_eq!(r, 0b00001111)
50716    }
50717
50718    #[simd_test(enable = "avx512f,avx512vl")]
50719    unsafe fn test_mm_mask_cmpge_epi32_mask() {
50720        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50721        let b = _mm_set1_epi32(-1);
50722        let mask = 0b11111111;
50723        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50724        assert_eq!(r, 0b00001111)
50725    }
50726
50727    #[simd_test(enable = "avx512f")]
50728    unsafe fn test_mm512_cmpeq_epi32_mask() {
50729        #[rustfmt::skip]
50730        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50731                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50732        #[rustfmt::skip]
50733        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50734                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50735        let m = _mm512_cmpeq_epi32_mask(b, a);
50736        assert_eq!(m, 0b11001111_11001111);
50737    }
50738
50739    #[simd_test(enable = "avx512f")]
50740    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50741        #[rustfmt::skip]
50742        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50743                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50744        #[rustfmt::skip]
50745        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50746                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50747        let mask = 0b01111010_01111010;
50748        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50749        assert_eq!(r, 0b01001010_01001010);
50750    }
50751
50752    #[simd_test(enable = "avx512f,avx512vl")]
50753    unsafe fn test_mm256_cmpeq_epi32_mask() {
50754        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50755        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50756        let m = _mm256_cmpeq_epi32_mask(b, a);
50757        assert_eq!(m, 0b11001111);
50758    }
50759
50760    #[simd_test(enable = "avx512f,avx512vl")]
50761    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50762        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50763        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50764        let mask = 0b01111010;
50765        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50766        assert_eq!(r, 0b01001010);
50767    }
50768
50769    #[simd_test(enable = "avx512f,avx512vl")]
50770    unsafe fn test_mm_cmpeq_epi32_mask() {
50771        let a = _mm_set_epi32(0, 1, -1, 13);
50772        let b = _mm_set_epi32(0, 1, 13, 42);
50773        let m = _mm_cmpeq_epi32_mask(b, a);
50774        assert_eq!(m, 0b00001100);
50775    }
50776
50777    #[simd_test(enable = "avx512f,avx512vl")]
50778    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50779        let a = _mm_set_epi32(0, 1, -1, 13);
50780        let b = _mm_set_epi32(0, 1, 13, 42);
50781        let mask = 0b11111111;
50782        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50783        assert_eq!(r, 0b00001100);
50784    }
50785
50786    #[simd_test(enable = "avx512f")]
50787    unsafe fn test_mm512_cmpneq_epi32_mask() {
50788        #[rustfmt::skip]
50789        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50790                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50791        #[rustfmt::skip]
50792        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50793                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50794        let m = _mm512_cmpneq_epi32_mask(b, a);
50795        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50796    }
50797
50798    #[simd_test(enable = "avx512f")]
50799    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50800        #[rustfmt::skip]
50801        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50802                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50803        #[rustfmt::skip]
50804        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50805                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50806        let mask = 0b01111010_01111010;
50807        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50808        assert_eq!(r, 0b00110010_00110010)
50809    }
50810
50811    #[simd_test(enable = "avx512f,avx512vl")]
50812    unsafe fn test_mm256_cmpneq_epi32_mask() {
50813        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50814        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50815        let m = _mm256_cmpneq_epi32_mask(b, a);
50816        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50817    }
50818
50819    #[simd_test(enable = "avx512f,avx512vl")]
50820    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50821        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50822        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50823        let mask = 0b11111111;
50824        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50825        assert_eq!(r, 0b00110011)
50826    }
50827
50828    #[simd_test(enable = "avx512f,avx512vl")]
50829    unsafe fn test_mm_cmpneq_epi32_mask() {
50830        let a = _mm_set_epi32(0, 1, -1, 13);
50831        let b = _mm_set_epi32(0, 1, 13, 42);
50832        let r = _mm_cmpneq_epi32_mask(b, a);
50833        assert_eq!(r, 0b00000011)
50834    }
50835
50836    #[simd_test(enable = "avx512f,avx512vl")]
50837    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50838        let a = _mm_set_epi32(0, 1, -1, 13);
50839        let b = _mm_set_epi32(0, 1, 13, 42);
50840        let mask = 0b11111111;
50841        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50842        assert_eq!(r, 0b00000011)
50843    }
50844
50845    #[simd_test(enable = "avx512f")]
50846    unsafe fn test_mm512_cmp_epi32_mask() {
50847        #[rustfmt::skip]
50848        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50849                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50850        let b = _mm512_set1_epi32(-1);
50851        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50852        assert_eq!(m, 0b00000101_00000101);
50853    }
50854
50855    #[simd_test(enable = "avx512f")]
50856    unsafe fn test_mm512_mask_cmp_epi32_mask() {
50857        #[rustfmt::skip]
50858        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50859                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50860        let b = _mm512_set1_epi32(-1);
50861        let mask = 0b01100110_01100110;
50862        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50863        assert_eq!(r, 0b00000100_00000100);
50864    }
50865
50866    #[simd_test(enable = "avx512f,avx512vl")]
50867    unsafe fn test_mm256_cmp_epi32_mask() {
50868        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50869        let b = _mm256_set1_epi32(-1);
50870        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50871        assert_eq!(m, 0b00000101);
50872    }
50873
50874    #[simd_test(enable = "avx512f,avx512vl")]
50875    unsafe fn test_mm256_mask_cmp_epi32_mask() {
50876        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50877        let b = _mm256_set1_epi32(-1);
50878        let mask = 0b01100110;
50879        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50880        assert_eq!(r, 0b00000100);
50881    }
50882
50883    #[simd_test(enable = "avx512f,avx512vl")]
50884    unsafe fn test_mm_cmp_epi32_mask() {
50885        let a = _mm_set_epi32(0, 1, -1, 13);
50886        let b = _mm_set1_epi32(1);
50887        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50888        assert_eq!(m, 0b00001010);
50889    }
50890
50891    #[simd_test(enable = "avx512f,avx512vl")]
50892    unsafe fn test_mm_mask_cmp_epi32_mask() {
50893        let a = _mm_set_epi32(0, 1, -1, 13);
50894        let b = _mm_set1_epi32(1);
50895        let mask = 0b11111111;
50896        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50897        assert_eq!(r, 0b00001010);
50898    }
50899
50900    #[simd_test(enable = "avx512f")]
50901    unsafe fn test_mm512_set_epi8() {
50902        let r = _mm512_set1_epi8(2);
50903        assert_eq_m512i(
50904            r,
50905            _mm512_set_epi8(
50906                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50907                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50908                2, 2, 2, 2, 2, 2, 2, 2,
50909            ),
50910        )
50911    }
50912
50913    #[simd_test(enable = "avx512f")]
50914    unsafe fn test_mm512_set_epi16() {
50915        let r = _mm512_set1_epi16(2);
50916        assert_eq_m512i(
50917            r,
50918            _mm512_set_epi16(
50919                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50920                2, 2, 2, 2,
50921            ),
50922        )
50923    }
50924
50925    #[simd_test(enable = "avx512f")]
50926    unsafe fn test_mm512_set_epi32() {
50927        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50928        assert_eq_m512i(
50929            r,
50930            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50931        )
50932    }
50933
50934    #[simd_test(enable = "avx512f")]
50935    unsafe fn test_mm512_setr_epi32() {
50936        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50937        assert_eq_m512i(
50938            r,
50939            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50940        )
50941    }
50942
50943    #[simd_test(enable = "avx512f")]
50944    unsafe fn test_mm512_set1_epi8() {
50945        let r = _mm512_set_epi8(
50946            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50947            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50948            2, 2, 2, 2, 2, 2,
50949        );
50950        assert_eq_m512i(r, _mm512_set1_epi8(2));
50951    }
50952
50953    #[simd_test(enable = "avx512f")]
50954    unsafe fn test_mm512_set1_epi16() {
50955        let r = _mm512_set_epi16(
50956            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50957            2, 2, 2,
50958        );
50959        assert_eq_m512i(r, _mm512_set1_epi16(2));
50960    }
50961
50962    #[simd_test(enable = "avx512f")]
50963    unsafe fn test_mm512_set1_epi32() {
50964        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50965        assert_eq_m512i(r, _mm512_set1_epi32(2));
50966    }
50967
50968    #[simd_test(enable = "avx512f")]
50969    unsafe fn test_mm512_setzero_si512() {
50970        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
50971    }
50972
50973    #[simd_test(enable = "avx512f")]
50974    unsafe fn test_mm512_setzero_epi32() {
50975        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
50976    }
50977
50978    #[simd_test(enable = "avx512f")]
50979    unsafe fn test_mm512_set_ps() {
50980        let r = _mm512_setr_ps(
50981            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50982        );
50983        assert_eq_m512(
50984            r,
50985            _mm512_set_ps(
50986                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50987            ),
50988        )
50989    }
50990
50991    #[simd_test(enable = "avx512f")]
50992    unsafe fn test_mm512_setr_ps() {
50993        let r = _mm512_set_ps(
50994            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50995        );
50996        assert_eq_m512(
50997            r,
50998            _mm512_setr_ps(
50999                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
51000            ),
51001        )
51002    }
51003
51004    #[simd_test(enable = "avx512f")]
51005    unsafe fn test_mm512_set1_ps() {
51006        #[rustfmt::skip]
51007        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
51008                                     2., 2., 2., 2., 2., 2., 2., 2.);
51009        assert_eq_m512(expected, _mm512_set1_ps(2.));
51010    }
51011
51012    #[simd_test(enable = "avx512f")]
51013    unsafe fn test_mm512_set4_epi32() {
51014        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51015        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
51016    }
51017
51018    #[simd_test(enable = "avx512f")]
51019    unsafe fn test_mm512_set4_ps() {
51020        let r = _mm512_set_ps(
51021            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51022        );
51023        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51024    }
51025
51026    #[simd_test(enable = "avx512f")]
51027    unsafe fn test_mm512_setr4_epi32() {
51028        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51029        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51030    }
51031
51032    #[simd_test(enable = "avx512f")]
51033    unsafe fn test_mm512_setr4_ps() {
51034        let r = _mm512_set_ps(
51035            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51036        );
51037        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51038    }
51039
51040    #[simd_test(enable = "avx512f")]
51041    unsafe fn test_mm512_setzero_ps() {
51042        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51043    }
51044
51045    #[simd_test(enable = "avx512f")]
51046    unsafe fn test_mm512_setzero() {
51047        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51048    }
51049
51050    #[simd_test(enable = "avx512f")]
51051    unsafe fn test_mm512_loadu_pd() {
51052        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51053        let p = a.as_ptr();
51054        let r = _mm512_loadu_pd(black_box(p));
51055        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51056        assert_eq_m512d(r, e);
51057    }
51058
51059    #[simd_test(enable = "avx512f")]
51060    unsafe fn test_mm512_storeu_pd() {
51061        let a = _mm512_set1_pd(9.);
51062        let mut r = _mm512_undefined_pd();
51063        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51064        assert_eq_m512d(r, a);
51065    }
51066
51067    #[simd_test(enable = "avx512f")]
51068    unsafe fn test_mm512_loadu_ps() {
51069        let a = &[
51070            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51071        ];
51072        let p = a.as_ptr();
51073        let r = _mm512_loadu_ps(black_box(p));
51074        let e = _mm512_setr_ps(
51075            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51076        );
51077        assert_eq_m512(r, e);
51078    }
51079
51080    #[simd_test(enable = "avx512f")]
51081    unsafe fn test_mm512_storeu_ps() {
51082        let a = _mm512_set1_ps(9.);
51083        let mut r = _mm512_undefined_ps();
51084        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51085        assert_eq_m512(r, a);
51086    }
51087
51088    #[simd_test(enable = "avx512f")]
51089    unsafe fn test_mm512_mask_loadu_epi32() {
51090        let src = _mm512_set1_epi32(42);
51091        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51092        let p = a.as_ptr();
51093        let m = 0b11101000_11001010;
51094        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51095        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51096        assert_eq_m512i(r, e);
51097    }
51098
51099    #[simd_test(enable = "avx512f")]
51100    unsafe fn test_mm512_maskz_loadu_epi32() {
51101        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51102        let p = a.as_ptr();
51103        let m = 0b11101000_11001010;
51104        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51105        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51106        assert_eq_m512i(r, e);
51107    }
51108
51109    #[simd_test(enable = "avx512f")]
51110    unsafe fn test_mm512_mask_load_epi32() {
51111        #[repr(align(64))]
51112        struct Align {
51113            data: [i32; 16], // 64 bytes
51114        }
51115        let src = _mm512_set1_epi32(42);
51116        let a = Align {
51117            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51118        };
51119        let p = a.data.as_ptr();
51120        let m = 0b11101000_11001010;
51121        let r = _mm512_mask_load_epi32(src, m, black_box(p));
51122        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51123        assert_eq_m512i(r, e);
51124    }
51125
51126    #[simd_test(enable = "avx512f")]
51127    unsafe fn test_mm512_maskz_load_epi32() {
51128        #[repr(align(64))]
51129        struct Align {
51130            data: [i32; 16], // 64 bytes
51131        }
51132        let a = Align {
51133            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51134        };
51135        let p = a.data.as_ptr();
51136        let m = 0b11101000_11001010;
51137        let r = _mm512_maskz_load_epi32(m, black_box(p));
51138        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51139        assert_eq_m512i(r, e);
51140    }
51141
51142    #[simd_test(enable = "avx512f")]
51143    unsafe fn test_mm512_mask_storeu_epi32() {
51144        let mut r = [42_i32; 16];
51145        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51146        let m = 0b11101000_11001010;
51147        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51148        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51149        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51150    }
51151
51152    #[simd_test(enable = "avx512f")]
51153    unsafe fn test_mm512_mask_store_epi32() {
51154        #[repr(align(64))]
51155        struct Align {
51156            data: [i32; 16],
51157        }
51158        let mut r = Align { data: [42; 16] };
51159        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51160        let m = 0b11101000_11001010;
51161        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51162        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51163        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51164    }
51165
51166    #[simd_test(enable = "avx512f")]
51167    unsafe fn test_mm512_mask_loadu_epi64() {
51168        let src = _mm512_set1_epi64(42);
51169        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51170        let p = a.as_ptr();
51171        let m = 0b11001010;
51172        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51173        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51174        assert_eq_m512i(r, e);
51175    }
51176
51177    #[simd_test(enable = "avx512f")]
51178    unsafe fn test_mm512_maskz_loadu_epi64() {
51179        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51180        let p = a.as_ptr();
51181        let m = 0b11001010;
51182        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51183        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51184        assert_eq_m512i(r, e);
51185    }
51186
51187    #[simd_test(enable = "avx512f")]
51188    unsafe fn test_mm512_mask_load_epi64() {
51189        #[repr(align(64))]
51190        struct Align {
51191            data: [i64; 8], // 64 bytes
51192        }
51193        let src = _mm512_set1_epi64(42);
51194        let a = Align {
51195            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51196        };
51197        let p = a.data.as_ptr();
51198        let m = 0b11001010;
51199        let r = _mm512_mask_load_epi64(src, m, black_box(p));
51200        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51201        assert_eq_m512i(r, e);
51202    }
51203
51204    #[simd_test(enable = "avx512f")]
51205    unsafe fn test_mm512_maskz_load_epi64() {
51206        #[repr(align(64))]
51207        struct Align {
51208            data: [i64; 8], // 64 bytes
51209        }
51210        let a = Align {
51211            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51212        };
51213        let p = a.data.as_ptr();
51214        let m = 0b11001010;
51215        let r = _mm512_maskz_load_epi64(m, black_box(p));
51216        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51217        assert_eq_m512i(r, e);
51218    }
51219
51220    #[simd_test(enable = "avx512f")]
51221    unsafe fn test_mm512_mask_storeu_epi64() {
51222        let mut r = [42_i64; 8];
51223        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51224        let m = 0b11001010;
51225        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51226        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51227        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51228    }
51229
51230    #[simd_test(enable = "avx512f")]
51231    unsafe fn test_mm512_mask_store_epi64() {
51232        #[repr(align(64))]
51233        struct Align {
51234            data: [i64; 8],
51235        }
51236        let mut r = Align { data: [42; 8] };
51237        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51238        let m = 0b11001010;
51239        let p = r.data.as_mut_ptr();
51240        _mm512_mask_store_epi64(p, m, a);
51241        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51242        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51243    }
51244
51245    #[simd_test(enable = "avx512f")]
51246    unsafe fn test_mm512_mask_loadu_ps() {
51247        let src = _mm512_set1_ps(42.0);
51248        let a = &[
51249            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51250            16.0,
51251        ];
51252        let p = a.as_ptr();
51253        let m = 0b11101000_11001010;
51254        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51255        let e = _mm512_setr_ps(
51256            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51257            16.0,
51258        );
51259        assert_eq_m512(r, e);
51260    }
51261
51262    #[simd_test(enable = "avx512f")]
51263    unsafe fn test_mm512_maskz_loadu_ps() {
51264        let a = &[
51265            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51266            16.0,
51267        ];
51268        let p = a.as_ptr();
51269        let m = 0b11101000_11001010;
51270        let r = _mm512_maskz_loadu_ps(m, black_box(p));
51271        let e = _mm512_setr_ps(
51272            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51273        );
51274        assert_eq_m512(r, e);
51275    }
51276
51277    #[simd_test(enable = "avx512f")]
51278    unsafe fn test_mm512_mask_load_ps() {
51279        #[repr(align(64))]
51280        struct Align {
51281            data: [f32; 16], // 64 bytes
51282        }
51283        let src = _mm512_set1_ps(42.0);
51284        let a = Align {
51285            data: [
51286                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51287                15.0, 16.0,
51288            ],
51289        };
51290        let p = a.data.as_ptr();
51291        let m = 0b11101000_11001010;
51292        let r = _mm512_mask_load_ps(src, m, black_box(p));
51293        let e = _mm512_setr_ps(
51294            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51295            16.0,
51296        );
51297        assert_eq_m512(r, e);
51298    }
51299
51300    #[simd_test(enable = "avx512f")]
51301    unsafe fn test_mm512_maskz_load_ps() {
51302        #[repr(align(64))]
51303        struct Align {
51304            data: [f32; 16], // 64 bytes
51305        }
51306        let a = Align {
51307            data: [
51308                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51309                15.0, 16.0,
51310            ],
51311        };
51312        let p = a.data.as_ptr();
51313        let m = 0b11101000_11001010;
51314        let r = _mm512_maskz_load_ps(m, black_box(p));
51315        let e = _mm512_setr_ps(
51316            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51317        );
51318        assert_eq_m512(r, e);
51319    }
51320
51321    #[simd_test(enable = "avx512f")]
51322    unsafe fn test_mm512_mask_storeu_ps() {
51323        let mut r = [42_f32; 16];
51324        let a = _mm512_setr_ps(
51325            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51326        );
51327        let m = 0b11101000_11001010;
51328        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51329        let e = _mm512_setr_ps(
51330            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51331            16.0,
51332        );
51333        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51334    }
51335
51336    #[simd_test(enable = "avx512f")]
51337    unsafe fn test_mm512_mask_store_ps() {
51338        #[repr(align(64))]
51339        struct Align {
51340            data: [f32; 16],
51341        }
51342        let mut r = Align { data: [42.0; 16] };
51343        let a = _mm512_setr_ps(
51344            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51345        );
51346        let m = 0b11101000_11001010;
51347        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51348        let e = _mm512_setr_ps(
51349            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51350            16.0,
51351        );
51352        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51353    }
51354
51355    #[simd_test(enable = "avx512f")]
51356    unsafe fn test_mm512_mask_loadu_pd() {
51357        let src = _mm512_set1_pd(42.0);
51358        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51359        let p = a.as_ptr();
51360        let m = 0b11001010;
51361        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51362        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51363        assert_eq_m512d(r, e);
51364    }
51365
51366    #[simd_test(enable = "avx512f")]
51367    unsafe fn test_mm512_maskz_loadu_pd() {
51368        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51369        let p = a.as_ptr();
51370        let m = 0b11001010;
51371        let r = _mm512_maskz_loadu_pd(m, black_box(p));
51372        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51373        assert_eq_m512d(r, e);
51374    }
51375
51376    #[simd_test(enable = "avx512f")]
51377    unsafe fn test_mm512_mask_load_pd() {
51378        #[repr(align(64))]
51379        struct Align {
51380            data: [f64; 8], // 64 bytes
51381        }
51382        let src = _mm512_set1_pd(42.0);
51383        let a = Align {
51384            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51385        };
51386        let p = a.data.as_ptr();
51387        let m = 0b11001010;
51388        let r = _mm512_mask_load_pd(src, m, black_box(p));
51389        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51390        assert_eq_m512d(r, e);
51391    }
51392
51393    #[simd_test(enable = "avx512f")]
51394    unsafe fn test_mm512_maskz_load_pd() {
51395        #[repr(align(64))]
51396        struct Align {
51397            data: [f64; 8], // 64 bytes
51398        }
51399        let a = Align {
51400            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51401        };
51402        let p = a.data.as_ptr();
51403        let m = 0b11001010;
51404        let r = _mm512_maskz_load_pd(m, black_box(p));
51405        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51406        assert_eq_m512d(r, e);
51407    }
51408
51409    #[simd_test(enable = "avx512f")]
51410    unsafe fn test_mm512_mask_storeu_pd() {
51411        let mut r = [42_f64; 8];
51412        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51413        let m = 0b11001010;
51414        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51415        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51416        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51417    }
51418
51419    #[simd_test(enable = "avx512f")]
51420    unsafe fn test_mm512_mask_store_pd() {
51421        #[repr(align(64))]
51422        struct Align {
51423            data: [f64; 8],
51424        }
51425        let mut r = Align { data: [42.0; 8] };
51426        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51427        let m = 0b11001010;
51428        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51429        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51430        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51431    }
51432
51433    #[simd_test(enable = "avx512f,avx512vl")]
51434    unsafe fn test_mm256_mask_loadu_epi32() {
51435        let src = _mm256_set1_epi32(42);
51436        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51437        let p = a.as_ptr();
51438        let m = 0b11001010;
51439        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51440        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51441        assert_eq_m256i(r, e);
51442    }
51443
51444    #[simd_test(enable = "avx512f,avx512vl")]
51445    unsafe fn test_mm256_maskz_loadu_epi32() {
51446        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51447        let p = a.as_ptr();
51448        let m = 0b11001010;
51449        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51450        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51451        assert_eq_m256i(r, e);
51452    }
51453
51454    #[simd_test(enable = "avx512f,avx512vl")]
51455    unsafe fn test_mm256_mask_load_epi32() {
51456        #[repr(align(32))]
51457        struct Align {
51458            data: [i32; 8], // 32 bytes
51459        }
51460        let src = _mm256_set1_epi32(42);
51461        let a = Align {
51462            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51463        };
51464        let p = a.data.as_ptr();
51465        let m = 0b11001010;
51466        let r = _mm256_mask_load_epi32(src, m, black_box(p));
51467        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51468        assert_eq_m256i(r, e);
51469    }
51470
51471    #[simd_test(enable = "avx512f,avx512vl")]
51472    unsafe fn test_mm256_maskz_load_epi32() {
51473        #[repr(align(32))]
51474        struct Align {
51475            data: [i32; 8], // 32 bytes
51476        }
51477        let a = Align {
51478            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51479        };
51480        let p = a.data.as_ptr();
51481        let m = 0b11001010;
51482        let r = _mm256_maskz_load_epi32(m, black_box(p));
51483        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51484        assert_eq_m256i(r, e);
51485    }
51486
51487    #[simd_test(enable = "avx512f,avx512vl")]
51488    unsafe fn test_mm256_mask_storeu_epi32() {
51489        let mut r = [42_i32; 8];
51490        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51491        let m = 0b11001010;
51492        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51493        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51494        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51495    }
51496
51497    #[simd_test(enable = "avx512f,avx512vl")]
51498    unsafe fn test_mm256_mask_store_epi32() {
51499        #[repr(align(64))]
51500        struct Align {
51501            data: [i32; 8],
51502        }
51503        let mut r = Align { data: [42; 8] };
51504        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51505        let m = 0b11001010;
51506        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51507        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51508        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51509    }
51510
51511    #[simd_test(enable = "avx512f,avx512vl")]
51512    unsafe fn test_mm256_mask_loadu_epi64() {
51513        let src = _mm256_set1_epi64x(42);
51514        let a = &[1_i64, 2, 3, 4];
51515        let p = a.as_ptr();
51516        let m = 0b1010;
51517        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51518        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51519        assert_eq_m256i(r, e);
51520    }
51521
51522    #[simd_test(enable = "avx512f,avx512vl")]
51523    unsafe fn test_mm256_maskz_loadu_epi64() {
51524        let a = &[1_i64, 2, 3, 4];
51525        let p = a.as_ptr();
51526        let m = 0b1010;
51527        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51528        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51529        assert_eq_m256i(r, e);
51530    }
51531
51532    #[simd_test(enable = "avx512f,avx512vl")]
51533    unsafe fn test_mm256_mask_load_epi64() {
51534        #[repr(align(32))]
51535        struct Align {
51536            data: [i64; 4], // 32 bytes
51537        }
51538        let src = _mm256_set1_epi64x(42);
51539        let a = Align {
51540            data: [1_i64, 2, 3, 4],
51541        };
51542        let p = a.data.as_ptr();
51543        let m = 0b1010;
51544        let r = _mm256_mask_load_epi64(src, m, black_box(p));
51545        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51546        assert_eq_m256i(r, e);
51547    }
51548
51549    #[simd_test(enable = "avx512f,avx512vl")]
51550    unsafe fn test_mm256_maskz_load_epi64() {
51551        #[repr(align(32))]
51552        struct Align {
51553            data: [i64; 4], // 32 bytes
51554        }
51555        let a = Align {
51556            data: [1_i64, 2, 3, 4],
51557        };
51558        let p = a.data.as_ptr();
51559        let m = 0b1010;
51560        let r = _mm256_maskz_load_epi64(m, black_box(p));
51561        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51562        assert_eq_m256i(r, e);
51563    }
51564
51565    #[simd_test(enable = "avx512f,avx512vl")]
51566    unsafe fn test_mm256_mask_storeu_epi64() {
51567        let mut r = [42_i64; 4];
51568        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51569        let m = 0b1010;
51570        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51571        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51572        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51573    }
51574
51575    #[simd_test(enable = "avx512f,avx512vl")]
51576    unsafe fn test_mm256_mask_store_epi64() {
51577        #[repr(align(32))]
51578        struct Align {
51579            data: [i64; 4],
51580        }
51581        let mut r = Align { data: [42; 4] };
51582        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51583        let m = 0b1010;
51584        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51585        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51586        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51587    }
51588
51589    #[simd_test(enable = "avx512f,avx512vl")]
51590    unsafe fn test_mm256_mask_loadu_ps() {
51591        let src = _mm256_set1_ps(42.0);
51592        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51593        let p = a.as_ptr();
51594        let m = 0b11001010;
51595        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51596        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51597        assert_eq_m256(r, e);
51598    }
51599
51600    #[simd_test(enable = "avx512f,avx512vl")]
51601    unsafe fn test_mm256_maskz_loadu_ps() {
51602        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51603        let p = a.as_ptr();
51604        let m = 0b11001010;
51605        let r = _mm256_maskz_loadu_ps(m, black_box(p));
51606        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51607        assert_eq_m256(r, e);
51608    }
51609
51610    #[simd_test(enable = "avx512f,avx512vl")]
51611    unsafe fn test_mm256_mask_load_ps() {
51612        #[repr(align(32))]
51613        struct Align {
51614            data: [f32; 8], // 32 bytes
51615        }
51616        let src = _mm256_set1_ps(42.0);
51617        let a = Align {
51618            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51619        };
51620        let p = a.data.as_ptr();
51621        let m = 0b11001010;
51622        let r = _mm256_mask_load_ps(src, m, black_box(p));
51623        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51624        assert_eq_m256(r, e);
51625    }
51626
51627    #[simd_test(enable = "avx512f,avx512vl")]
51628    unsafe fn test_mm256_maskz_load_ps() {
51629        #[repr(align(32))]
51630        struct Align {
51631            data: [f32; 8], // 32 bytes
51632        }
51633        let a = Align {
51634            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51635        };
51636        let p = a.data.as_ptr();
51637        let m = 0b11001010;
51638        let r = _mm256_maskz_load_ps(m, black_box(p));
51639        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51640        assert_eq_m256(r, e);
51641    }
51642
51643    #[simd_test(enable = "avx512f,avx512vl")]
51644    unsafe fn test_mm256_mask_storeu_ps() {
51645        let mut r = [42_f32; 8];
51646        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51647        let m = 0b11001010;
51648        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51649        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51650        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51651    }
51652
51653    #[simd_test(enable = "avx512f,avx512vl")]
51654    unsafe fn test_mm256_mask_store_ps() {
51655        #[repr(align(32))]
51656        struct Align {
51657            data: [f32; 8],
51658        }
51659        let mut r = Align { data: [42.0; 8] };
51660        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51661        let m = 0b11001010;
51662        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51663        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51664        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51665    }
51666
51667    #[simd_test(enable = "avx512f,avx512vl")]
51668    unsafe fn test_mm256_mask_loadu_pd() {
51669        let src = _mm256_set1_pd(42.0);
51670        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51671        let p = a.as_ptr();
51672        let m = 0b1010;
51673        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51674        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51675        assert_eq_m256d(r, e);
51676    }
51677
51678    #[simd_test(enable = "avx512f,avx512vl")]
51679    unsafe fn test_mm256_maskz_loadu_pd() {
51680        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51681        let p = a.as_ptr();
51682        let m = 0b1010;
51683        let r = _mm256_maskz_loadu_pd(m, black_box(p));
51684        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51685        assert_eq_m256d(r, e);
51686    }
51687
51688    #[simd_test(enable = "avx512f,avx512vl")]
51689    unsafe fn test_mm256_mask_load_pd() {
51690        #[repr(align(32))]
51691        struct Align {
51692            data: [f64; 4], // 32 bytes
51693        }
51694        let src = _mm256_set1_pd(42.0);
51695        let a = Align {
51696            data: [1.0_f64, 2.0, 3.0, 4.0],
51697        };
51698        let p = a.data.as_ptr();
51699        let m = 0b1010;
51700        let r = _mm256_mask_load_pd(src, m, black_box(p));
51701        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51702        assert_eq_m256d(r, e);
51703    }
51704
51705    #[simd_test(enable = "avx512f,avx512vl")]
51706    unsafe fn test_mm256_maskz_load_pd() {
51707        #[repr(align(32))]
51708        struct Align {
51709            data: [f64; 4], // 32 bytes
51710        }
51711        let a = Align {
51712            data: [1.0_f64, 2.0, 3.0, 4.0],
51713        };
51714        let p = a.data.as_ptr();
51715        let m = 0b1010;
51716        let r = _mm256_maskz_load_pd(m, black_box(p));
51717        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51718        assert_eq_m256d(r, e);
51719    }
51720
51721    #[simd_test(enable = "avx512f,avx512vl")]
51722    unsafe fn test_mm256_mask_storeu_pd() {
51723        let mut r = [42_f64; 4];
51724        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51725        let m = 0b1010;
51726        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51727        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51728        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51729    }
51730
51731    #[simd_test(enable = "avx512f,avx512vl")]
51732    unsafe fn test_mm256_mask_store_pd() {
51733        #[repr(align(32))]
51734        struct Align {
51735            data: [f64; 4],
51736        }
51737        let mut r = Align { data: [42.0; 4] };
51738        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51739        let m = 0b1010;
51740        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51741        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51742        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51743    }
51744
51745    #[simd_test(enable = "avx512f,avx512vl")]
51746    unsafe fn test_mm_mask_loadu_epi32() {
51747        let src = _mm_set1_epi32(42);
51748        let a = &[1_i32, 2, 3, 4];
51749        let p = a.as_ptr();
51750        let m = 0b1010;
51751        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51752        let e = _mm_setr_epi32(42, 2, 42, 4);
51753        assert_eq_m128i(r, e);
51754    }
51755
51756    #[simd_test(enable = "avx512f,avx512vl")]
51757    unsafe fn test_mm_maskz_loadu_epi32() {
51758        let a = &[1_i32, 2, 3, 4];
51759        let p = a.as_ptr();
51760        let m = 0b1010;
51761        let r = _mm_maskz_loadu_epi32(m, black_box(p));
51762        let e = _mm_setr_epi32(0, 2, 0, 4);
51763        assert_eq_m128i(r, e);
51764    }
51765
51766    #[simd_test(enable = "avx512f,avx512vl")]
51767    unsafe fn test_mm_mask_load_epi32() {
51768        #[repr(align(16))]
51769        struct Align {
51770            data: [i32; 4], // 32 bytes
51771        }
51772        let src = _mm_set1_epi32(42);
51773        let a = Align {
51774            data: [1_i32, 2, 3, 4],
51775        };
51776        let p = a.data.as_ptr();
51777        let m = 0b1010;
51778        let r = _mm_mask_load_epi32(src, m, black_box(p));
51779        let e = _mm_setr_epi32(42, 2, 42, 4);
51780        assert_eq_m128i(r, e);
51781    }
51782
51783    #[simd_test(enable = "avx512f,avx512vl")]
51784    unsafe fn test_mm_maskz_load_epi32() {
51785        #[repr(align(16))]
51786        struct Align {
51787            data: [i32; 4], // 16 bytes
51788        }
51789        let a = Align {
51790            data: [1_i32, 2, 3, 4],
51791        };
51792        let p = a.data.as_ptr();
51793        let m = 0b1010;
51794        let r = _mm_maskz_load_epi32(m, black_box(p));
51795        let e = _mm_setr_epi32(0, 2, 0, 4);
51796        assert_eq_m128i(r, e);
51797    }
51798
51799    #[simd_test(enable = "avx512f,avx512vl")]
51800    unsafe fn test_mm_mask_storeu_epi32() {
51801        let mut r = [42_i32; 4];
51802        let a = _mm_setr_epi32(1, 2, 3, 4);
51803        let m = 0b1010;
51804        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51805        let e = _mm_setr_epi32(42, 2, 42, 4);
51806        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51807    }
51808
51809    #[simd_test(enable = "avx512f,avx512vl")]
51810    unsafe fn test_mm_mask_store_epi32() {
51811        #[repr(align(16))]
51812        struct Align {
51813            data: [i32; 4], // 16 bytes
51814        }
51815        let mut r = Align { data: [42; 4] };
51816        let a = _mm_setr_epi32(1, 2, 3, 4);
51817        let m = 0b1010;
51818        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51819        let e = _mm_setr_epi32(42, 2, 42, 4);
51820        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51821    }
51822
51823    #[simd_test(enable = "avx512f,avx512vl")]
51824    unsafe fn test_mm_mask_loadu_epi64() {
51825        let src = _mm_set1_epi64x(42);
51826        let a = &[1_i64, 2];
51827        let p = a.as_ptr();
51828        let m = 0b10;
51829        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51830        let e = _mm_setr_epi64x(42, 2);
51831        assert_eq_m128i(r, e);
51832    }
51833
51834    #[simd_test(enable = "avx512f,avx512vl")]
51835    unsafe fn test_mm_maskz_loadu_epi64() {
51836        let a = &[1_i64, 2];
51837        let p = a.as_ptr();
51838        let m = 0b10;
51839        let r = _mm_maskz_loadu_epi64(m, black_box(p));
51840        let e = _mm_setr_epi64x(0, 2);
51841        assert_eq_m128i(r, e);
51842    }
51843
51844    #[simd_test(enable = "avx512f,avx512vl")]
51845    unsafe fn test_mm_mask_load_epi64() {
51846        #[repr(align(16))]
51847        struct Align {
51848            data: [i64; 2], // 16 bytes
51849        }
51850        let src = _mm_set1_epi64x(42);
51851        let a = Align { data: [1_i64, 2] };
51852        let p = a.data.as_ptr();
51853        let m = 0b10;
51854        let r = _mm_mask_load_epi64(src, m, black_box(p));
51855        let e = _mm_setr_epi64x(42, 2);
51856        assert_eq_m128i(r, e);
51857    }
51858
51859    #[simd_test(enable = "avx512f,avx512vl")]
51860    unsafe fn test_mm_maskz_load_epi64() {
51861        #[repr(align(16))]
51862        struct Align {
51863            data: [i64; 2], // 16 bytes
51864        }
51865        let a = Align { data: [1_i64, 2] };
51866        let p = a.data.as_ptr();
51867        let m = 0b10;
51868        let r = _mm_maskz_load_epi64(m, black_box(p));
51869        let e = _mm_setr_epi64x(0, 2);
51870        assert_eq_m128i(r, e);
51871    }
51872
51873    #[simd_test(enable = "avx512f,avx512vl")]
51874    unsafe fn test_mm_mask_storeu_epi64() {
51875        let mut r = [42_i64; 2];
51876        let a = _mm_setr_epi64x(1, 2);
51877        let m = 0b10;
51878        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51879        let e = _mm_setr_epi64x(42, 2);
51880        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51881    }
51882
51883    #[simd_test(enable = "avx512f,avx512vl")]
51884    unsafe fn test_mm_mask_store_epi64() {
51885        #[repr(align(16))]
51886        struct Align {
51887            data: [i64; 2], // 16 bytes
51888        }
51889        let mut r = Align { data: [42; 2] };
51890        let a = _mm_setr_epi64x(1, 2);
51891        let m = 0b10;
51892        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51893        let e = _mm_setr_epi64x(42, 2);
51894        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51895    }
51896
51897    #[simd_test(enable = "avx512f,avx512vl")]
51898    unsafe fn test_mm_mask_loadu_ps() {
51899        let src = _mm_set1_ps(42.0);
51900        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51901        let p = a.as_ptr();
51902        let m = 0b1010;
51903        let r = _mm_mask_loadu_ps(src, m, black_box(p));
51904        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51905        assert_eq_m128(r, e);
51906    }
51907
51908    #[simd_test(enable = "avx512f,avx512vl")]
51909    unsafe fn test_mm_maskz_loadu_ps() {
51910        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51911        let p = a.as_ptr();
51912        let m = 0b1010;
51913        let r = _mm_maskz_loadu_ps(m, black_box(p));
51914        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51915        assert_eq_m128(r, e);
51916    }
51917
51918    #[simd_test(enable = "avx512f,avx512vl")]
51919    unsafe fn test_mm_mask_load_ps() {
51920        #[repr(align(16))]
51921        struct Align {
51922            data: [f32; 4], // 16 bytes
51923        }
51924        let src = _mm_set1_ps(42.0);
51925        let a = Align {
51926            data: [1.0_f32, 2.0, 3.0, 4.0],
51927        };
51928        let p = a.data.as_ptr();
51929        let m = 0b1010;
51930        let r = _mm_mask_load_ps(src, m, black_box(p));
51931        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51932        assert_eq_m128(r, e);
51933    }
51934
51935    #[simd_test(enable = "avx512f,avx512vl")]
51936    unsafe fn test_mm_maskz_load_ps() {
51937        #[repr(align(16))]
51938        struct Align {
51939            data: [f32; 4], // 16 bytes
51940        }
51941        let a = Align {
51942            data: [1.0_f32, 2.0, 3.0, 4.0],
51943        };
51944        let p = a.data.as_ptr();
51945        let m = 0b1010;
51946        let r = _mm_maskz_load_ps(m, black_box(p));
51947        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51948        assert_eq_m128(r, e);
51949    }
51950
51951    #[simd_test(enable = "avx512f,avx512vl")]
51952    unsafe fn test_mm_mask_storeu_ps() {
51953        let mut r = [42_f32; 4];
51954        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51955        let m = 0b1010;
51956        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51957        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51958        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51959    }
51960
51961    #[simd_test(enable = "avx512f,avx512vl")]
51962    unsafe fn test_mm_mask_store_ps() {
51963        #[repr(align(16))]
51964        struct Align {
51965            data: [f32; 4], // 16 bytes
51966        }
51967        let mut r = Align { data: [42.0; 4] };
51968        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51969        let m = 0b1010;
51970        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51971        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51972        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51973    }
51974
51975    #[simd_test(enable = "avx512f,avx512vl")]
51976    unsafe fn test_mm_mask_loadu_pd() {
51977        let src = _mm_set1_pd(42.0);
51978        let a = &[1.0_f64, 2.0];
51979        let p = a.as_ptr();
51980        let m = 0b10;
51981        let r = _mm_mask_loadu_pd(src, m, black_box(p));
51982        let e = _mm_setr_pd(42.0, 2.0);
51983        assert_eq_m128d(r, e);
51984    }
51985
51986    #[simd_test(enable = "avx512f,avx512vl")]
51987    unsafe fn test_mm_maskz_loadu_pd() {
51988        let a = &[1.0_f64, 2.0];
51989        let p = a.as_ptr();
51990        let m = 0b10;
51991        let r = _mm_maskz_loadu_pd(m, black_box(p));
51992        let e = _mm_setr_pd(0.0, 2.0);
51993        assert_eq_m128d(r, e);
51994    }
51995
51996    #[simd_test(enable = "avx512f,avx512vl")]
51997    unsafe fn test_mm_mask_load_pd() {
51998        #[repr(align(16))]
51999        struct Align {
52000            data: [f64; 2], // 16 bytes
52001        }
52002        let src = _mm_set1_pd(42.0);
52003        let a = Align {
52004            data: [1.0_f64, 2.0],
52005        };
52006        let p = a.data.as_ptr();
52007        let m = 0b10;
52008        let r = _mm_mask_load_pd(src, m, black_box(p));
52009        let e = _mm_setr_pd(42.0, 2.0);
52010        assert_eq_m128d(r, e);
52011    }
52012
52013    #[simd_test(enable = "avx512f,avx512vl")]
52014    unsafe fn test_mm_maskz_load_pd() {
52015        #[repr(align(16))]
52016        struct Align {
52017            data: [f64; 2], // 16 bytes
52018        }
52019        let a = Align {
52020            data: [1.0_f64, 2.0],
52021        };
52022        let p = a.data.as_ptr();
52023        let m = 0b10;
52024        let r = _mm_maskz_load_pd(m, black_box(p));
52025        let e = _mm_setr_pd(0.0, 2.0);
52026        assert_eq_m128d(r, e);
52027    }
52028
52029    #[simd_test(enable = "avx512f")]
52030    unsafe fn test_mm_mask_load_ss() {
52031        #[repr(align(16))]
52032        struct Align {
52033            data: f32,
52034        }
52035        let src = _mm_set_ss(2.0);
52036        let mem = Align { data: 1.0 };
52037        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52038        assert_eq_m128(r, _mm_set_ss(1.0));
52039        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52040        assert_eq_m128(r, _mm_set_ss(2.0));
52041    }
52042
52043    #[simd_test(enable = "avx512f")]
52044    unsafe fn test_mm_maskz_load_ss() {
52045        #[repr(align(16))]
52046        struct Align {
52047            data: f32,
52048        }
52049        let mem = Align { data: 1.0 };
52050        let r = _mm_maskz_load_ss(0b1, &mem.data);
52051        assert_eq_m128(r, _mm_set_ss(1.0));
52052        let r = _mm_maskz_load_ss(0b0, &mem.data);
52053        assert_eq_m128(r, _mm_set_ss(0.0));
52054    }
52055
52056    #[simd_test(enable = "avx512f")]
52057    unsafe fn test_mm_mask_load_sd() {
52058        #[repr(align(16))]
52059        struct Align {
52060            data: f64,
52061        }
52062        let src = _mm_set_sd(2.0);
52063        let mem = Align { data: 1.0 };
52064        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52065        assert_eq_m128d(r, _mm_set_sd(1.0));
52066        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52067        assert_eq_m128d(r, _mm_set_sd(2.0));
52068    }
52069
52070    #[simd_test(enable = "avx512f")]
52071    unsafe fn test_mm_maskz_load_sd() {
52072        #[repr(align(16))]
52073        struct Align {
52074            data: f64,
52075        }
52076        let mem = Align { data: 1.0 };
52077        let r = _mm_maskz_load_sd(0b1, &mem.data);
52078        assert_eq_m128d(r, _mm_set_sd(1.0));
52079        let r = _mm_maskz_load_sd(0b0, &mem.data);
52080        assert_eq_m128d(r, _mm_set_sd(0.0));
52081    }
52082
52083    #[simd_test(enable = "avx512f,avx512vl")]
52084    unsafe fn test_mm_mask_storeu_pd() {
52085        let mut r = [42_f64; 2];
52086        let a = _mm_setr_pd(1.0, 2.0);
52087        let m = 0b10;
52088        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52089        let e = _mm_setr_pd(42.0, 2.0);
52090        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52091    }
52092
52093    #[simd_test(enable = "avx512f,avx512vl")]
52094    unsafe fn test_mm_mask_store_pd() {
52095        #[repr(align(16))]
52096        struct Align {
52097            data: [f64; 2], // 16 bytes
52098        }
52099        let mut r = Align { data: [42.0; 2] };
52100        let a = _mm_setr_pd(1.0, 2.0);
52101        let m = 0b10;
52102        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52103        let e = _mm_setr_pd(42.0, 2.0);
52104        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52105    }
52106
52107    #[simd_test(enable = "avx512f")]
52108    unsafe fn test_mm_mask_store_ss() {
52109        #[repr(align(16))]
52110        struct Align {
52111            data: f32,
52112        }
52113        let a = _mm_set_ss(2.0);
52114        let mut mem = Align { data: 1.0 };
52115        _mm_mask_store_ss(&mut mem.data, 0b1, a);
52116        assert_eq!(mem.data, 2.0);
52117        _mm_mask_store_ss(&mut mem.data, 0b0, a);
52118        assert_eq!(mem.data, 2.0);
52119    }
52120
52121    #[simd_test(enable = "avx512f")]
52122    unsafe fn test_mm_mask_store_sd() {
52123        #[repr(align(16))]
52124        struct Align {
52125            data: f64,
52126        }
52127        let a = _mm_set_sd(2.0);
52128        let mut mem = Align { data: 1.0 };
52129        _mm_mask_store_sd(&mut mem.data, 0b1, a);
52130        assert_eq!(mem.data, 2.0);
52131        _mm_mask_store_sd(&mut mem.data, 0b0, a);
52132        assert_eq!(mem.data, 2.0);
52133    }
52134
52135    #[simd_test(enable = "avx512f")]
52136    unsafe fn test_mm512_setr_pd() {
52137        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52138        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52139    }
52140
52141    #[simd_test(enable = "avx512f")]
52142    unsafe fn test_mm512_set_pd() {
52143        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52144        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52145    }
52146
52147    #[simd_test(enable = "avx512f")]
52148    unsafe fn test_mm512_rol_epi32() {
52149        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52150        let r = _mm512_rol_epi32::<1>(a);
52151        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52152        assert_eq_m512i(r, e);
52153    }
52154
52155    #[simd_test(enable = "avx512f")]
52156    unsafe fn test_mm512_mask_rol_epi32() {
52157        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52158        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52159        assert_eq_m512i(r, a);
52160        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52161        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52162        assert_eq_m512i(r, e);
52163    }
52164
52165    #[simd_test(enable = "avx512f")]
52166    unsafe fn test_mm512_maskz_rol_epi32() {
52167        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52168        let r = _mm512_maskz_rol_epi32::<1>(0, a);
52169        assert_eq_m512i(r, _mm512_setzero_si512());
52170        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52171        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52172        assert_eq_m512i(r, e);
52173    }
52174
52175    #[simd_test(enable = "avx512f,avx512vl")]
52176    unsafe fn test_mm256_rol_epi32() {
52177        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52178        let r = _mm256_rol_epi32::<1>(a);
52179        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52180        assert_eq_m256i(r, e);
52181    }
52182
52183    #[simd_test(enable = "avx512f,avx512vl")]
52184    unsafe fn test_mm256_mask_rol_epi32() {
52185        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52186        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52187        assert_eq_m256i(r, a);
52188        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52189        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52190        assert_eq_m256i(r, e);
52191    }
52192
52193    #[simd_test(enable = "avx512f,avx512vl")]
52194    unsafe fn test_mm256_maskz_rol_epi32() {
52195        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52196        let r = _mm256_maskz_rol_epi32::<1>(0, a);
52197        assert_eq_m256i(r, _mm256_setzero_si256());
52198        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52199        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52200        assert_eq_m256i(r, e);
52201    }
52202
52203    #[simd_test(enable = "avx512f,avx512vl")]
52204    unsafe fn test_mm_rol_epi32() {
52205        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52206        let r = _mm_rol_epi32::<1>(a);
52207        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52208        assert_eq_m128i(r, e);
52209    }
52210
52211    #[simd_test(enable = "avx512f,avx512vl")]
52212    unsafe fn test_mm_mask_rol_epi32() {
52213        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52214        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52215        assert_eq_m128i(r, a);
52216        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52217        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52218        assert_eq_m128i(r, e);
52219    }
52220
52221    #[simd_test(enable = "avx512f,avx512vl")]
52222    unsafe fn test_mm_maskz_rol_epi32() {
52223        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52224        let r = _mm_maskz_rol_epi32::<1>(0, a);
52225        assert_eq_m128i(r, _mm_setzero_si128());
52226        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52227        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52228        assert_eq_m128i(r, e);
52229    }
52230
52231    #[simd_test(enable = "avx512f")]
52232    unsafe fn test_mm512_ror_epi32() {
52233        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52234        let r = _mm512_ror_epi32::<1>(a);
52235        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52236        assert_eq_m512i(r, e);
52237    }
52238
52239    #[simd_test(enable = "avx512f")]
52240    unsafe fn test_mm512_mask_ror_epi32() {
52241        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52242        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52243        assert_eq_m512i(r, a);
52244        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52245        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52246        assert_eq_m512i(r, e);
52247    }
52248
52249    #[simd_test(enable = "avx512f")]
52250    unsafe fn test_mm512_maskz_ror_epi32() {
52251        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52252        let r = _mm512_maskz_ror_epi32::<1>(0, a);
52253        assert_eq_m512i(r, _mm512_setzero_si512());
52254        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52255        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52256        assert_eq_m512i(r, e);
52257    }
52258
52259    #[simd_test(enable = "avx512f,avx512vl")]
52260    unsafe fn test_mm256_ror_epi32() {
52261        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52262        let r = _mm256_ror_epi32::<1>(a);
52263        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52264        assert_eq_m256i(r, e);
52265    }
52266
52267    #[simd_test(enable = "avx512f,avx512vl")]
52268    unsafe fn test_mm256_mask_ror_epi32() {
52269        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52270        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52271        assert_eq_m256i(r, a);
52272        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52273        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52274        assert_eq_m256i(r, e);
52275    }
52276
52277    #[simd_test(enable = "avx512f,avx512vl")]
52278    unsafe fn test_mm256_maskz_ror_epi32() {
52279        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52280        let r = _mm256_maskz_ror_epi32::<1>(0, a);
52281        assert_eq_m256i(r, _mm256_setzero_si256());
52282        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52283        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52284        assert_eq_m256i(r, e);
52285    }
52286
52287    #[simd_test(enable = "avx512f,avx512vl")]
52288    unsafe fn test_mm_ror_epi32() {
52289        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52290        let r = _mm_ror_epi32::<1>(a);
52291        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52292        assert_eq_m128i(r, e);
52293    }
52294
52295    #[simd_test(enable = "avx512f,avx512vl")]
52296    unsafe fn test_mm_mask_ror_epi32() {
52297        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52298        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52299        assert_eq_m128i(r, a);
52300        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52301        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52302        assert_eq_m128i(r, e);
52303    }
52304
52305    #[simd_test(enable = "avx512f,avx512vl")]
52306    unsafe fn test_mm_maskz_ror_epi32() {
52307        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52308        let r = _mm_maskz_ror_epi32::<1>(0, a);
52309        assert_eq_m128i(r, _mm_setzero_si128());
52310        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52311        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52312        assert_eq_m128i(r, e);
52313    }
52314
52315    #[simd_test(enable = "avx512f")]
52316    unsafe fn test_mm512_slli_epi32() {
52317        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52318        let r = _mm512_slli_epi32::<1>(a);
52319        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52320        assert_eq_m512i(r, e);
52321    }
52322
52323    #[simd_test(enable = "avx512f")]
52324    unsafe fn test_mm512_mask_slli_epi32() {
52325        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52326        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52327        assert_eq_m512i(r, a);
52328        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52329        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52330        assert_eq_m512i(r, e);
52331    }
52332
52333    #[simd_test(enable = "avx512f")]
52334    unsafe fn test_mm512_maskz_slli_epi32() {
52335        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52336        let r = _mm512_maskz_slli_epi32::<1>(0, a);
52337        assert_eq_m512i(r, _mm512_setzero_si512());
52338        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52339        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52340        assert_eq_m512i(r, e);
52341    }
52342
52343    #[simd_test(enable = "avx512f,avx512vl")]
52344    unsafe fn test_mm256_mask_slli_epi32() {
52345        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52346        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52347        assert_eq_m256i(r, a);
52348        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52349        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52350        assert_eq_m256i(r, e);
52351    }
52352
52353    #[simd_test(enable = "avx512f,avx512vl")]
52354    unsafe fn test_mm256_maskz_slli_epi32() {
52355        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52356        let r = _mm256_maskz_slli_epi32::<1>(0, a);
52357        assert_eq_m256i(r, _mm256_setzero_si256());
52358        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52359        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52360        assert_eq_m256i(r, e);
52361    }
52362
52363    #[simd_test(enable = "avx512f,avx512vl")]
52364    unsafe fn test_mm_mask_slli_epi32() {
52365        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52366        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52367        assert_eq_m128i(r, a);
52368        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52369        let e = _mm_set_epi32(0, 2, 2, 2);
52370        assert_eq_m128i(r, e);
52371    }
52372
52373    #[simd_test(enable = "avx512f,avx512vl")]
52374    unsafe fn test_mm_maskz_slli_epi32() {
52375        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52376        let r = _mm_maskz_slli_epi32::<1>(0, a);
52377        assert_eq_m128i(r, _mm_setzero_si128());
52378        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52379        let e = _mm_set_epi32(0, 2, 2, 2);
52380        assert_eq_m128i(r, e);
52381    }
52382
52383    #[simd_test(enable = "avx512f")]
52384    unsafe fn test_mm512_srli_epi32() {
52385        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52386        let r = _mm512_srli_epi32::<1>(a);
52387        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52388        assert_eq_m512i(r, e);
52389    }
52390
52391    #[simd_test(enable = "avx512f")]
52392    unsafe fn test_mm512_mask_srli_epi32() {
52393        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52394        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52395        assert_eq_m512i(r, a);
52396        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52397        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52398        assert_eq_m512i(r, e);
52399    }
52400
52401    #[simd_test(enable = "avx512f")]
52402    unsafe fn test_mm512_maskz_srli_epi32() {
52403        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52404        let r = _mm512_maskz_srli_epi32::<1>(0, a);
52405        assert_eq_m512i(r, _mm512_setzero_si512());
52406        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52407        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52408        assert_eq_m512i(r, e);
52409    }
52410
52411    #[simd_test(enable = "avx512f,avx512vl")]
52412    unsafe fn test_mm256_mask_srli_epi32() {
52413        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52414        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52415        assert_eq_m256i(r, a);
52416        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52417        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52418        assert_eq_m256i(r, e);
52419    }
52420
52421    #[simd_test(enable = "avx512f,avx512vl")]
52422    unsafe fn test_mm256_maskz_srli_epi32() {
52423        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52424        let r = _mm256_maskz_srli_epi32::<1>(0, a);
52425        assert_eq_m256i(r, _mm256_setzero_si256());
52426        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52427        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52428        assert_eq_m256i(r, e);
52429    }
52430
52431    #[simd_test(enable = "avx512f,avx512vl")]
52432    unsafe fn test_mm_mask_srli_epi32() {
52433        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52434        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52435        assert_eq_m128i(r, a);
52436        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52437        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52438        assert_eq_m128i(r, e);
52439    }
52440
52441    #[simd_test(enable = "avx512f,avx512vl")]
52442    unsafe fn test_mm_maskz_srli_epi32() {
52443        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52444        let r = _mm_maskz_srli_epi32::<1>(0, a);
52445        assert_eq_m128i(r, _mm_setzero_si128());
52446        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52447        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52448        assert_eq_m128i(r, e);
52449    }
52450
52451    #[simd_test(enable = "avx512f")]
52452    unsafe fn test_mm512_rolv_epi32() {
52453        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52454        let b = _mm512_set1_epi32(1);
52455        let r = _mm512_rolv_epi32(a, b);
52456        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52457        assert_eq_m512i(r, e);
52458    }
52459
52460    #[simd_test(enable = "avx512f")]
52461    unsafe fn test_mm512_mask_rolv_epi32() {
52462        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52463        let b = _mm512_set1_epi32(1);
52464        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52465        assert_eq_m512i(r, a);
52466        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52467        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52468        assert_eq_m512i(r, e);
52469    }
52470
52471    #[simd_test(enable = "avx512f")]
52472    unsafe fn test_mm512_maskz_rolv_epi32() {
52473        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52474        let b = _mm512_set1_epi32(1);
52475        let r = _mm512_maskz_rolv_epi32(0, a, b);
52476        assert_eq_m512i(r, _mm512_setzero_si512());
52477        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52478        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52479        assert_eq_m512i(r, e);
52480    }
52481
52482    #[simd_test(enable = "avx512f,avx512vl")]
52483    unsafe fn test_mm256_rolv_epi32() {
52484        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52485        let b = _mm256_set1_epi32(1);
52486        let r = _mm256_rolv_epi32(a, b);
52487        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52488        assert_eq_m256i(r, e);
52489    }
52490
52491    #[simd_test(enable = "avx512f,avx512vl")]
52492    unsafe fn test_mm256_mask_rolv_epi32() {
52493        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52494        let b = _mm256_set1_epi32(1);
52495        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52496        assert_eq_m256i(r, a);
52497        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52498        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52499        assert_eq_m256i(r, e);
52500    }
52501
52502    #[simd_test(enable = "avx512f,avx512vl")]
52503    unsafe fn test_mm256_maskz_rolv_epi32() {
52504        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52505        let b = _mm256_set1_epi32(1);
52506        let r = _mm256_maskz_rolv_epi32(0, a, b);
52507        assert_eq_m256i(r, _mm256_setzero_si256());
52508        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52509        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52510        assert_eq_m256i(r, e);
52511    }
52512
52513    #[simd_test(enable = "avx512f,avx512vl")]
52514    unsafe fn test_mm_rolv_epi32() {
52515        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52516        let b = _mm_set1_epi32(1);
52517        let r = _mm_rolv_epi32(a, b);
52518        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52519        assert_eq_m128i(r, e);
52520    }
52521
52522    #[simd_test(enable = "avx512f,avx512vl")]
52523    unsafe fn test_mm_mask_rolv_epi32() {
52524        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52525        let b = _mm_set1_epi32(1);
52526        let r = _mm_mask_rolv_epi32(a, 0, a, b);
52527        assert_eq_m128i(r, a);
52528        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52529        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52530        assert_eq_m128i(r, e);
52531    }
52532
52533    #[simd_test(enable = "avx512f,avx512vl")]
52534    unsafe fn test_mm_maskz_rolv_epi32() {
52535        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52536        let b = _mm_set1_epi32(1);
52537        let r = _mm_maskz_rolv_epi32(0, a, b);
52538        assert_eq_m128i(r, _mm_setzero_si128());
52539        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52540        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52541        assert_eq_m128i(r, e);
52542    }
52543
52544    #[simd_test(enable = "avx512f")]
52545    unsafe fn test_mm512_rorv_epi32() {
52546        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52547        let b = _mm512_set1_epi32(1);
52548        let r = _mm512_rorv_epi32(a, b);
52549        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52550        assert_eq_m512i(r, e);
52551    }
52552
52553    #[simd_test(enable = "avx512f")]
52554    unsafe fn test_mm512_mask_rorv_epi32() {
52555        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52556        let b = _mm512_set1_epi32(1);
52557        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52558        assert_eq_m512i(r, a);
52559        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52560        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52561        assert_eq_m512i(r, e);
52562    }
52563
52564    #[simd_test(enable = "avx512f")]
52565    unsafe fn test_mm512_maskz_rorv_epi32() {
52566        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52567        let b = _mm512_set1_epi32(1);
52568        let r = _mm512_maskz_rorv_epi32(0, a, b);
52569        assert_eq_m512i(r, _mm512_setzero_si512());
52570        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52571        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52572        assert_eq_m512i(r, e);
52573    }
52574
52575    #[simd_test(enable = "avx512f,avx512vl")]
52576    unsafe fn test_mm256_rorv_epi32() {
52577        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52578        let b = _mm256_set1_epi32(1);
52579        let r = _mm256_rorv_epi32(a, b);
52580        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52581        assert_eq_m256i(r, e);
52582    }
52583
52584    #[simd_test(enable = "avx512f,avx512vl")]
52585    unsafe fn test_mm256_mask_rorv_epi32() {
52586        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52587        let b = _mm256_set1_epi32(1);
52588        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52589        assert_eq_m256i(r, a);
52590        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52591        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52592        assert_eq_m256i(r, e);
52593    }
52594
52595    #[simd_test(enable = "avx512f,avx512vl")]
52596    unsafe fn test_mm256_maskz_rorv_epi32() {
52597        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52598        let b = _mm256_set1_epi32(1);
52599        let r = _mm256_maskz_rorv_epi32(0, a, b);
52600        assert_eq_m256i(r, _mm256_setzero_si256());
52601        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52602        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52603        assert_eq_m256i(r, e);
52604    }
52605
52606    #[simd_test(enable = "avx512f,avx512vl")]
52607    unsafe fn test_mm_rorv_epi32() {
52608        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52609        let b = _mm_set1_epi32(1);
52610        let r = _mm_rorv_epi32(a, b);
52611        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52612        assert_eq_m128i(r, e);
52613    }
52614
52615    #[simd_test(enable = "avx512f,avx512vl")]
52616    unsafe fn test_mm_mask_rorv_epi32() {
52617        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52618        let b = _mm_set1_epi32(1);
52619        let r = _mm_mask_rorv_epi32(a, 0, a, b);
52620        assert_eq_m128i(r, a);
52621        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52622        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52623        assert_eq_m128i(r, e);
52624    }
52625
52626    #[simd_test(enable = "avx512f,avx512vl")]
52627    unsafe fn test_mm_maskz_rorv_epi32() {
52628        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52629        let b = _mm_set1_epi32(1);
52630        let r = _mm_maskz_rorv_epi32(0, a, b);
52631        assert_eq_m128i(r, _mm_setzero_si128());
52632        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52633        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52634        assert_eq_m128i(r, e);
52635    }
52636
52637    #[simd_test(enable = "avx512f")]
52638    unsafe fn test_mm512_sllv_epi32() {
52639        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52640        let count = _mm512_set1_epi32(1);
52641        let r = _mm512_sllv_epi32(a, count);
52642        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52643        assert_eq_m512i(r, e);
52644    }
52645
52646    #[simd_test(enable = "avx512f")]
52647    unsafe fn test_mm512_mask_sllv_epi32() {
52648        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52649        let count = _mm512_set1_epi32(1);
52650        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52651        assert_eq_m512i(r, a);
52652        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52653        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52654        assert_eq_m512i(r, e);
52655    }
52656
52657    #[simd_test(enable = "avx512f")]
52658    unsafe fn test_mm512_maskz_sllv_epi32() {
52659        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52660        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52661        let r = _mm512_maskz_sllv_epi32(0, a, count);
52662        assert_eq_m512i(r, _mm512_setzero_si512());
52663        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52664        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52665        assert_eq_m512i(r, e);
52666    }
52667
52668    #[simd_test(enable = "avx512f,avx512vl")]
52669    unsafe fn test_mm256_mask_sllv_epi32() {
52670        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52671        let count = _mm256_set1_epi32(1);
52672        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52673        assert_eq_m256i(r, a);
52674        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52675        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52676        assert_eq_m256i(r, e);
52677    }
52678
52679    #[simd_test(enable = "avx512f,avx512vl")]
52680    unsafe fn test_mm256_maskz_sllv_epi32() {
52681        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52682        let count = _mm256_set1_epi32(1);
52683        let r = _mm256_maskz_sllv_epi32(0, a, count);
52684        assert_eq_m256i(r, _mm256_setzero_si256());
52685        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52686        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52687        assert_eq_m256i(r, e);
52688    }
52689
52690    #[simd_test(enable = "avx512f,avx512vl")]
52691    unsafe fn test_mm_mask_sllv_epi32() {
52692        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52693        let count = _mm_set1_epi32(1);
52694        let r = _mm_mask_sllv_epi32(a, 0, a, count);
52695        assert_eq_m128i(r, a);
52696        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52697        let e = _mm_set_epi32(0, 2, 2, 2);
52698        assert_eq_m128i(r, e);
52699    }
52700
52701    #[simd_test(enable = "avx512f,avx512vl")]
52702    unsafe fn test_mm_maskz_sllv_epi32() {
52703        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52704        let count = _mm_set1_epi32(1);
52705        let r = _mm_maskz_sllv_epi32(0, a, count);
52706        assert_eq_m128i(r, _mm_setzero_si128());
52707        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52708        let e = _mm_set_epi32(0, 2, 2, 2);
52709        assert_eq_m128i(r, e);
52710    }
52711
52712    #[simd_test(enable = "avx512f")]
52713    unsafe fn test_mm512_srlv_epi32() {
52714        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52715        let count = _mm512_set1_epi32(1);
52716        let r = _mm512_srlv_epi32(a, count);
52717        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52718        assert_eq_m512i(r, e);
52719    }
52720
52721    #[simd_test(enable = "avx512f")]
52722    unsafe fn test_mm512_mask_srlv_epi32() {
52723        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52724        let count = _mm512_set1_epi32(1);
52725        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52726        assert_eq_m512i(r, a);
52727        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52728        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52729        assert_eq_m512i(r, e);
52730    }
52731
52732    #[simd_test(enable = "avx512f")]
52733    unsafe fn test_mm512_maskz_srlv_epi32() {
52734        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52735        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52736        let r = _mm512_maskz_srlv_epi32(0, a, count);
52737        assert_eq_m512i(r, _mm512_setzero_si512());
52738        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52739        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52740        assert_eq_m512i(r, e);
52741    }
52742
52743    #[simd_test(enable = "avx512f,avx512vl")]
52744    unsafe fn test_mm256_mask_srlv_epi32() {
52745        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52746        let count = _mm256_set1_epi32(1);
52747        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52748        assert_eq_m256i(r, a);
52749        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52750        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52751        assert_eq_m256i(r, e);
52752    }
52753
52754    #[simd_test(enable = "avx512f,avx512vl")]
52755    unsafe fn test_mm256_maskz_srlv_epi32() {
52756        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52757        let count = _mm256_set1_epi32(1);
52758        let r = _mm256_maskz_srlv_epi32(0, a, count);
52759        assert_eq_m256i(r, _mm256_setzero_si256());
52760        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52761        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52762        assert_eq_m256i(r, e);
52763    }
52764
52765    #[simd_test(enable = "avx512f,avx512vl")]
52766    unsafe fn test_mm_mask_srlv_epi32() {
52767        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52768        let count = _mm_set1_epi32(1);
52769        let r = _mm_mask_srlv_epi32(a, 0, a, count);
52770        assert_eq_m128i(r, a);
52771        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52772        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52773        assert_eq_m128i(r, e);
52774    }
52775
52776    #[simd_test(enable = "avx512f,avx512vl")]
52777    unsafe fn test_mm_maskz_srlv_epi32() {
52778        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52779        let count = _mm_set1_epi32(1);
52780        let r = _mm_maskz_srlv_epi32(0, a, count);
52781        assert_eq_m128i(r, _mm_setzero_si128());
52782        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52783        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52784        assert_eq_m128i(r, e);
52785    }
52786
52787    #[simd_test(enable = "avx512f")]
52788    unsafe fn test_mm512_sll_epi32() {
52789        #[rustfmt::skip]
52790        let a = _mm512_set_epi32(
52791            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52792            0, 0, 0, 0,
52793            0, 0, 0, 0,
52794            0, 0, 0, 0,
52795        );
52796        let count = _mm_set_epi32(0, 0, 0, 2);
52797        let r = _mm512_sll_epi32(a, count);
52798        #[rustfmt::skip]
52799        let e = _mm512_set_epi32(
52800            0, 1 << 2, 1 << 3, 1 << 4,
52801            0, 0, 0, 0,
52802            0, 0, 0, 0,
52803            0, 0, 0, 0,
52804        );
52805        assert_eq_m512i(r, e);
52806    }
52807
52808    #[simd_test(enable = "avx512f")]
52809    unsafe fn test_mm512_mask_sll_epi32() {
52810        #[rustfmt::skip]
52811        let a = _mm512_set_epi32(
52812            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52813            0, 0, 0, 0,
52814            0, 0, 0, 0,
52815            0, 0, 0, 0,
52816        );
52817        let count = _mm_set_epi32(0, 0, 0, 2);
52818        let r = _mm512_mask_sll_epi32(a, 0, a, count);
52819        assert_eq_m512i(r, a);
52820        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52821        #[rustfmt::skip]
52822        let e = _mm512_set_epi32(
52823            0, 1 << 2, 1 << 3, 1 << 4,
52824            0, 0, 0, 0,
52825            0, 0, 0, 0,
52826            0, 0, 0, 0,
52827        );
52828        assert_eq_m512i(r, e);
52829    }
52830
52831    #[simd_test(enable = "avx512f")]
52832    unsafe fn test_mm512_maskz_sll_epi32() {
52833        #[rustfmt::skip]
52834        let a = _mm512_set_epi32(
52835            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52836            0, 0, 0, 0,
52837            0, 0, 0, 0,
52838            0, 0, 0, 1 << 31,
52839        );
52840        let count = _mm_set_epi32(2, 0, 0, 2);
52841        let r = _mm512_maskz_sll_epi32(0, a, count);
52842        assert_eq_m512i(r, _mm512_setzero_si512());
52843        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52844        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52845        assert_eq_m512i(r, e);
52846    }
52847
52848    #[simd_test(enable = "avx512f,avx512vl")]
52849    unsafe fn test_mm256_mask_sll_epi32() {
52850        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52851        let count = _mm_set_epi32(0, 0, 0, 1);
52852        let r = _mm256_mask_sll_epi32(a, 0, a, count);
52853        assert_eq_m256i(r, a);
52854        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52855        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52856        assert_eq_m256i(r, e);
52857    }
52858
52859    #[simd_test(enable = "avx512f,avx512vl")]
52860    unsafe fn test_mm256_maskz_sll_epi32() {
52861        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52862        let count = _mm_set_epi32(0, 0, 0, 1);
52863        let r = _mm256_maskz_sll_epi32(0, a, count);
52864        assert_eq_m256i(r, _mm256_setzero_si256());
52865        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52866        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52867        assert_eq_m256i(r, e);
52868    }
52869
52870    #[simd_test(enable = "avx512f,avx512vl")]
52871    unsafe fn test_mm_mask_sll_epi32() {
52872        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52873        let count = _mm_set_epi32(0, 0, 0, 1);
52874        let r = _mm_mask_sll_epi32(a, 0, a, count);
52875        assert_eq_m128i(r, a);
52876        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52877        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52878        assert_eq_m128i(r, e);
52879    }
52880
52881    #[simd_test(enable = "avx512f,avx512vl")]
52882    unsafe fn test_mm_maskz_sll_epi32() {
52883        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52884        let count = _mm_set_epi32(0, 0, 0, 1);
52885        let r = _mm_maskz_sll_epi32(0, a, count);
52886        assert_eq_m128i(r, _mm_setzero_si128());
52887        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52888        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52889        assert_eq_m128i(r, e);
52890    }
52891
52892    #[simd_test(enable = "avx512f")]
52893    unsafe fn test_mm512_srl_epi32() {
52894        #[rustfmt::skip]
52895        let a = _mm512_set_epi32(
52896            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52897            0, 0, 0, 0,
52898            0, 0, 0, 0,
52899            0, 0, 0, 0,
52900        );
52901        let count = _mm_set_epi32(0, 0, 0, 2);
52902        let r = _mm512_srl_epi32(a, count);
52903        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52904        assert_eq_m512i(r, e);
52905    }
52906
52907    #[simd_test(enable = "avx512f")]
52908    unsafe fn test_mm512_mask_srl_epi32() {
52909        #[rustfmt::skip]
52910        let a = _mm512_set_epi32(
52911            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52912            0, 0, 0, 0,
52913            0, 0, 0, 0,
52914            0, 0, 0, 0,
52915        );
52916        let count = _mm_set_epi32(0, 0, 0, 2);
52917        let r = _mm512_mask_srl_epi32(a, 0, a, count);
52918        assert_eq_m512i(r, a);
52919        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52920        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52921        assert_eq_m512i(r, e);
52922    }
52923
52924    #[simd_test(enable = "avx512f")]
52925    unsafe fn test_mm512_maskz_srl_epi32() {
52926        #[rustfmt::skip]
52927        let a = _mm512_set_epi32(
52928            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52929            0, 0, 0, 0,
52930            0, 0, 0, 0,
52931            0, 0, 0, 1 << 31,
52932        );
52933        let count = _mm_set_epi32(2, 0, 0, 2);
52934        let r = _mm512_maskz_srl_epi32(0, a, count);
52935        assert_eq_m512i(r, _mm512_setzero_si512());
52936        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52937        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52938        assert_eq_m512i(r, e);
52939    }
52940
52941    #[simd_test(enable = "avx512f,avx512vl")]
52942    unsafe fn test_mm256_mask_srl_epi32() {
52943        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52944        let count = _mm_set_epi32(0, 0, 0, 1);
52945        let r = _mm256_mask_srl_epi32(a, 0, a, count);
52946        assert_eq_m256i(r, a);
52947        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
52948        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52949        assert_eq_m256i(r, e);
52950    }
52951
52952    #[simd_test(enable = "avx512f,avx512vl")]
52953    unsafe fn test_mm256_maskz_srl_epi32() {
52954        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52955        let count = _mm_set_epi32(0, 0, 0, 1);
52956        let r = _mm256_maskz_srl_epi32(0, a, count);
52957        assert_eq_m256i(r, _mm256_setzero_si256());
52958        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
52959        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52960        assert_eq_m256i(r, e);
52961    }
52962
52963    #[simd_test(enable = "avx512f,avx512vl")]
52964    unsafe fn test_mm_mask_srl_epi32() {
52965        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52966        let count = _mm_set_epi32(0, 0, 0, 1);
52967        let r = _mm_mask_srl_epi32(a, 0, a, count);
52968        assert_eq_m128i(r, a);
52969        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
52970        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52971        assert_eq_m128i(r, e);
52972    }
52973
52974    #[simd_test(enable = "avx512f,avx512vl")]
52975    unsafe fn test_mm_maskz_srl_epi32() {
52976        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52977        let count = _mm_set_epi32(0, 0, 0, 1);
52978        let r = _mm_maskz_srl_epi32(0, a, count);
52979        assert_eq_m128i(r, _mm_setzero_si128());
52980        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
52981        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52982        assert_eq_m128i(r, e);
52983    }
52984
52985    #[simd_test(enable = "avx512f")]
52986    unsafe fn test_mm512_sra_epi32() {
52987        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
52988        let count = _mm_set_epi32(1, 0, 0, 2);
52989        let r = _mm512_sra_epi32(a, count);
52990        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52991        assert_eq_m512i(r, e);
52992    }
52993
52994    #[simd_test(enable = "avx512f")]
52995    unsafe fn test_mm512_mask_sra_epi32() {
52996        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
52997        let count = _mm_set_epi32(0, 0, 0, 2);
52998        let r = _mm512_mask_sra_epi32(a, 0, a, count);
52999        assert_eq_m512i(r, a);
53000        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
53001        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
53002        assert_eq_m512i(r, e);
53003    }
53004
53005    #[simd_test(enable = "avx512f")]
53006    unsafe fn test_mm512_maskz_sra_epi32() {
53007        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53008        let count = _mm_set_epi32(2, 0, 0, 2);
53009        let r = _mm512_maskz_sra_epi32(0, a, count);
53010        assert_eq_m512i(r, _mm512_setzero_si512());
53011        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
53012        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53013        assert_eq_m512i(r, e);
53014    }
53015
53016    #[simd_test(enable = "avx512f,avx512vl")]
53017    unsafe fn test_mm256_mask_sra_epi32() {
53018        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53019        let count = _mm_set_epi32(0, 0, 0, 1);
53020        let r = _mm256_mask_sra_epi32(a, 0, a, count);
53021        assert_eq_m256i(r, a);
53022        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53023        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53024        assert_eq_m256i(r, e);
53025    }
53026
53027    #[simd_test(enable = "avx512f,avx512vl")]
53028    unsafe fn test_mm256_maskz_sra_epi32() {
53029        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53030        let count = _mm_set_epi32(0, 0, 0, 1);
53031        let r = _mm256_maskz_sra_epi32(0, a, count);
53032        assert_eq_m256i(r, _mm256_setzero_si256());
53033        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53034        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53035        assert_eq_m256i(r, e);
53036    }
53037
53038    #[simd_test(enable = "avx512f,avx512vl")]
53039    unsafe fn test_mm_mask_sra_epi32() {
53040        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53041        let count = _mm_set_epi32(0, 0, 0, 1);
53042        let r = _mm_mask_sra_epi32(a, 0, a, count);
53043        assert_eq_m128i(r, a);
53044        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53045        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53046        assert_eq_m128i(r, e);
53047    }
53048
53049    #[simd_test(enable = "avx512f,avx512vl")]
53050    unsafe fn test_mm_maskz_sra_epi32() {
53051        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53052        let count = _mm_set_epi32(0, 0, 0, 1);
53053        let r = _mm_maskz_sra_epi32(0, a, count);
53054        assert_eq_m128i(r, _mm_setzero_si128());
53055        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53056        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53057        assert_eq_m128i(r, e);
53058    }
53059
53060    #[simd_test(enable = "avx512f")]
53061    unsafe fn test_mm512_srav_epi32() {
53062        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53063        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53064        let r = _mm512_srav_epi32(a, count);
53065        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53066        assert_eq_m512i(r, e);
53067    }
53068
53069    #[simd_test(enable = "avx512f")]
53070    unsafe fn test_mm512_mask_srav_epi32() {
53071        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53072        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53073        let r = _mm512_mask_srav_epi32(a, 0, a, count);
53074        assert_eq_m512i(r, a);
53075        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53076        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53077        assert_eq_m512i(r, e);
53078    }
53079
53080    #[simd_test(enable = "avx512f")]
53081    unsafe fn test_mm512_maskz_srav_epi32() {
53082        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53083        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53084        let r = _mm512_maskz_srav_epi32(0, a, count);
53085        assert_eq_m512i(r, _mm512_setzero_si512());
53086        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53087        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53088        assert_eq_m512i(r, e);
53089    }
53090
53091    #[simd_test(enable = "avx512f,avx512vl")]
53092    unsafe fn test_mm256_mask_srav_epi32() {
53093        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53094        let count = _mm256_set1_epi32(1);
53095        let r = _mm256_mask_srav_epi32(a, 0, a, count);
53096        assert_eq_m256i(r, a);
53097        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53098        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53099        assert_eq_m256i(r, e);
53100    }
53101
53102    #[simd_test(enable = "avx512f,avx512vl")]
53103    unsafe fn test_mm256_maskz_srav_epi32() {
53104        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53105        let count = _mm256_set1_epi32(1);
53106        let r = _mm256_maskz_srav_epi32(0, a, count);
53107        assert_eq_m256i(r, _mm256_setzero_si256());
53108        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53109        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53110        assert_eq_m256i(r, e);
53111    }
53112
53113    #[simd_test(enable = "avx512f,avx512vl")]
53114    unsafe fn test_mm_mask_srav_epi32() {
53115        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53116        let count = _mm_set1_epi32(1);
53117        let r = _mm_mask_srav_epi32(a, 0, a, count);
53118        assert_eq_m128i(r, a);
53119        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53120        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53121        assert_eq_m128i(r, e);
53122    }
53123
53124    #[simd_test(enable = "avx512f,avx512vl")]
53125    unsafe fn test_mm_maskz_srav_epi32() {
53126        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53127        let count = _mm_set1_epi32(1);
53128        let r = _mm_maskz_srav_epi32(0, a, count);
53129        assert_eq_m128i(r, _mm_setzero_si128());
53130        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53131        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53132        assert_eq_m128i(r, e);
53133    }
53134
53135    #[simd_test(enable = "avx512f")]
53136    unsafe fn test_mm512_srai_epi32() {
53137        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53138        let r = _mm512_srai_epi32::<2>(a);
53139        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53140        assert_eq_m512i(r, e);
53141    }
53142
53143    #[simd_test(enable = "avx512f")]
53144    unsafe fn test_mm512_mask_srai_epi32() {
53145        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53146        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53147        assert_eq_m512i(r, a);
53148        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53149        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53150        assert_eq_m512i(r, e);
53151    }
53152
53153    #[simd_test(enable = "avx512f")]
53154    unsafe fn test_mm512_maskz_srai_epi32() {
53155        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53156        let r = _mm512_maskz_srai_epi32::<2>(0, a);
53157        assert_eq_m512i(r, _mm512_setzero_si512());
53158        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53159        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53160        assert_eq_m512i(r, e);
53161    }
53162
53163    #[simd_test(enable = "avx512f,avx512vl")]
53164    unsafe fn test_mm256_mask_srai_epi32() {
53165        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53166        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53167        assert_eq_m256i(r, a);
53168        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53169        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53170        assert_eq_m256i(r, e);
53171    }
53172
53173    #[simd_test(enable = "avx512f,avx512vl")]
53174    unsafe fn test_mm256_maskz_srai_epi32() {
53175        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53176        let r = _mm256_maskz_srai_epi32::<1>(0, a);
53177        assert_eq_m256i(r, _mm256_setzero_si256());
53178        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53179        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53180        assert_eq_m256i(r, e);
53181    }
53182
53183    #[simd_test(enable = "avx512f,avx512vl")]
53184    unsafe fn test_mm_mask_srai_epi32() {
53185        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53186        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53187        assert_eq_m128i(r, a);
53188        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53189        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53190        assert_eq_m128i(r, e);
53191    }
53192
53193    #[simd_test(enable = "avx512f,avx512vl")]
53194    unsafe fn test_mm_maskz_srai_epi32() {
53195        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53196        let r = _mm_maskz_srai_epi32::<1>(0, a);
53197        assert_eq_m128i(r, _mm_setzero_si128());
53198        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53199        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53200        assert_eq_m128i(r, e);
53201    }
53202
53203    #[simd_test(enable = "avx512f")]
53204    unsafe fn test_mm512_permute_ps() {
53205        let a = _mm512_setr_ps(
53206            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53207        );
53208        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53209        let e = _mm512_setr_ps(
53210            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53211        );
53212        assert_eq_m512(r, e);
53213    }
53214
53215    #[simd_test(enable = "avx512f")]
53216    unsafe fn test_mm512_mask_permute_ps() {
53217        let a = _mm512_setr_ps(
53218            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53219        );
53220        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53221        assert_eq_m512(r, a);
53222        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53223        let e = _mm512_setr_ps(
53224            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53225        );
53226        assert_eq_m512(r, e);
53227    }
53228
53229    #[simd_test(enable = "avx512f")]
53230    unsafe fn test_mm512_maskz_permute_ps() {
53231        let a = _mm512_setr_ps(
53232            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53233        );
53234        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53235        assert_eq_m512(r, _mm512_setzero_ps());
53236        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53237        let e = _mm512_setr_ps(
53238            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53239        );
53240        assert_eq_m512(r, e);
53241    }
53242
53243    #[simd_test(enable = "avx512f,avx512vl")]
53244    unsafe fn test_mm256_mask_permute_ps() {
53245        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53246        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53247        assert_eq_m256(r, a);
53248        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53249        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53250        assert_eq_m256(r, e);
53251    }
53252
53253    #[simd_test(enable = "avx512f,avx512vl")]
53254    unsafe fn test_mm256_maskz_permute_ps() {
53255        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53256        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53257        assert_eq_m256(r, _mm256_setzero_ps());
53258        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53259        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53260        assert_eq_m256(r, e);
53261    }
53262
53263    #[simd_test(enable = "avx512f,avx512vl")]
53264    unsafe fn test_mm_mask_permute_ps() {
53265        let a = _mm_set_ps(0., 1., 2., 3.);
53266        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53267        assert_eq_m128(r, a);
53268        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53269        let e = _mm_set_ps(0., 0., 0., 0.);
53270        assert_eq_m128(r, e);
53271    }
53272
53273    #[simd_test(enable = "avx512f,avx512vl")]
53274    unsafe fn test_mm_maskz_permute_ps() {
53275        let a = _mm_set_ps(0., 1., 2., 3.);
53276        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53277        assert_eq_m128(r, _mm_setzero_ps());
53278        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53279        let e = _mm_set_ps(0., 0., 0., 0.);
53280        assert_eq_m128(r, e);
53281    }
53282
53283    #[simd_test(enable = "avx512f")]
53284    unsafe fn test_mm512_permutevar_epi32() {
53285        let idx = _mm512_set1_epi32(1);
53286        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53287        let r = _mm512_permutevar_epi32(idx, a);
53288        let e = _mm512_set1_epi32(14);
53289        assert_eq_m512i(r, e);
53290    }
53291
53292    #[simd_test(enable = "avx512f")]
53293    unsafe fn test_mm512_mask_permutevar_epi32() {
53294        let idx = _mm512_set1_epi32(1);
53295        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53296        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53297        assert_eq_m512i(r, a);
53298        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53299        let e = _mm512_set1_epi32(14);
53300        assert_eq_m512i(r, e);
53301    }
53302
53303    #[simd_test(enable = "avx512f")]
53304    unsafe fn test_mm512_permutevar_ps() {
53305        let a = _mm512_set_ps(
53306            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53307        );
53308        let b = _mm512_set1_epi32(0b01);
53309        let r = _mm512_permutevar_ps(a, b);
53310        let e = _mm512_set_ps(
53311            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53312        );
53313        assert_eq_m512(r, e);
53314    }
53315
53316    #[simd_test(enable = "avx512f")]
53317    unsafe fn test_mm512_mask_permutevar_ps() {
53318        let a = _mm512_set_ps(
53319            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53320        );
53321        let b = _mm512_set1_epi32(0b01);
53322        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53323        assert_eq_m512(r, a);
53324        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53325        let e = _mm512_set_ps(
53326            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53327        );
53328        assert_eq_m512(r, e);
53329    }
53330
53331    #[simd_test(enable = "avx512f")]
53332    unsafe fn test_mm512_maskz_permutevar_ps() {
53333        let a = _mm512_set_ps(
53334            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53335        );
53336        let b = _mm512_set1_epi32(0b01);
53337        let r = _mm512_maskz_permutevar_ps(0, a, b);
53338        assert_eq_m512(r, _mm512_setzero_ps());
53339        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53340        let e = _mm512_set_ps(
53341            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53342        );
53343        assert_eq_m512(r, e);
53344    }
53345
53346    #[simd_test(enable = "avx512f,avx512vl")]
53347    unsafe fn test_mm256_mask_permutevar_ps() {
53348        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53349        let b = _mm256_set1_epi32(0b01);
53350        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53351        assert_eq_m256(r, a);
53352        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53353        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53354        assert_eq_m256(r, e);
53355    }
53356
53357    #[simd_test(enable = "avx512f,avx512vl")]
53358    unsafe fn test_mm256_maskz_permutevar_ps() {
53359        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53360        let b = _mm256_set1_epi32(0b01);
53361        let r = _mm256_maskz_permutevar_ps(0, a, b);
53362        assert_eq_m256(r, _mm256_setzero_ps());
53363        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53364        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53365        assert_eq_m256(r, e);
53366    }
53367
53368    #[simd_test(enable = "avx512f,avx512vl")]
53369    unsafe fn test_mm_mask_permutevar_ps() {
53370        let a = _mm_set_ps(0., 1., 2., 3.);
53371        let b = _mm_set1_epi32(0b01);
53372        let r = _mm_mask_permutevar_ps(a, 0, a, b);
53373        assert_eq_m128(r, a);
53374        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53375        let e = _mm_set_ps(2., 2., 2., 2.);
53376        assert_eq_m128(r, e);
53377    }
53378
53379    #[simd_test(enable = "avx512f,avx512vl")]
53380    unsafe fn test_mm_maskz_permutevar_ps() {
53381        let a = _mm_set_ps(0., 1., 2., 3.);
53382        let b = _mm_set1_epi32(0b01);
53383        let r = _mm_maskz_permutevar_ps(0, a, b);
53384        assert_eq_m128(r, _mm_setzero_ps());
53385        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53386        let e = _mm_set_ps(2., 2., 2., 2.);
53387        assert_eq_m128(r, e);
53388    }
53389
53390    #[simd_test(enable = "avx512f")]
53391    unsafe fn test_mm512_permutexvar_epi32() {
53392        let idx = _mm512_set1_epi32(1);
53393        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53394        let r = _mm512_permutexvar_epi32(idx, a);
53395        let e = _mm512_set1_epi32(14);
53396        assert_eq_m512i(r, e);
53397    }
53398
53399    #[simd_test(enable = "avx512f")]
53400    unsafe fn test_mm512_mask_permutexvar_epi32() {
53401        let idx = _mm512_set1_epi32(1);
53402        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53403        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53404        assert_eq_m512i(r, a);
53405        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53406        let e = _mm512_set1_epi32(14);
53407        assert_eq_m512i(r, e);
53408    }
53409
53410    #[simd_test(enable = "avx512f")]
53411    unsafe fn test_mm512_maskz_permutexvar_epi32() {
53412        let idx = _mm512_set1_epi32(1);
53413        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53414        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53415        assert_eq_m512i(r, _mm512_setzero_si512());
53416        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53417        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53418        assert_eq_m512i(r, e);
53419    }
53420
53421    #[simd_test(enable = "avx512f,avx512vl")]
53422    unsafe fn test_mm256_permutexvar_epi32() {
53423        let idx = _mm256_set1_epi32(1);
53424        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53425        let r = _mm256_permutexvar_epi32(idx, a);
53426        let e = _mm256_set1_epi32(6);
53427        assert_eq_m256i(r, e);
53428    }
53429
53430    #[simd_test(enable = "avx512f,avx512vl")]
53431    unsafe fn test_mm256_mask_permutexvar_epi32() {
53432        let idx = _mm256_set1_epi32(1);
53433        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53434        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53435        assert_eq_m256i(r, a);
53436        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53437        let e = _mm256_set1_epi32(6);
53438        assert_eq_m256i(r, e);
53439    }
53440
53441    #[simd_test(enable = "avx512f,avx512vl")]
53442    unsafe fn test_mm256_maskz_permutexvar_epi32() {
53443        let idx = _mm256_set1_epi32(1);
53444        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53445        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53446        assert_eq_m256i(r, _mm256_setzero_si256());
53447        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53448        let e = _mm256_set1_epi32(6);
53449        assert_eq_m256i(r, e);
53450    }
53451
53452    #[simd_test(enable = "avx512f")]
53453    unsafe fn test_mm512_permutexvar_ps() {
53454        let idx = _mm512_set1_epi32(1);
53455        let a = _mm512_set_ps(
53456            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53457        );
53458        let r = _mm512_permutexvar_ps(idx, a);
53459        let e = _mm512_set1_ps(14.);
53460        assert_eq_m512(r, e);
53461    }
53462
53463    #[simd_test(enable = "avx512f")]
53464    unsafe fn test_mm512_mask_permutexvar_ps() {
53465        let idx = _mm512_set1_epi32(1);
53466        let a = _mm512_set_ps(
53467            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53468        );
53469        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53470        assert_eq_m512(r, a);
53471        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53472        let e = _mm512_set1_ps(14.);
53473        assert_eq_m512(r, e);
53474    }
53475
53476    #[simd_test(enable = "avx512f")]
53477    unsafe fn test_mm512_maskz_permutexvar_ps() {
53478        let idx = _mm512_set1_epi32(1);
53479        let a = _mm512_set_ps(
53480            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53481        );
53482        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53483        assert_eq_m512(r, _mm512_setzero_ps());
53484        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53485        let e = _mm512_set_ps(
53486            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53487        );
53488        assert_eq_m512(r, e);
53489    }
53490
53491    #[simd_test(enable = "avx512f,avx512vl")]
53492    unsafe fn test_mm256_permutexvar_ps() {
53493        let idx = _mm256_set1_epi32(1);
53494        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53495        let r = _mm256_permutexvar_ps(idx, a);
53496        let e = _mm256_set1_ps(6.);
53497        assert_eq_m256(r, e);
53498    }
53499
53500    #[simd_test(enable = "avx512f,avx512vl")]
53501    unsafe fn test_mm256_mask_permutexvar_ps() {
53502        let idx = _mm256_set1_epi32(1);
53503        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53504        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53505        assert_eq_m256(r, a);
53506        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53507        let e = _mm256_set1_ps(6.);
53508        assert_eq_m256(r, e);
53509    }
53510
53511    #[simd_test(enable = "avx512f,avx512vl")]
53512    unsafe fn test_mm256_maskz_permutexvar_ps() {
53513        let idx = _mm256_set1_epi32(1);
53514        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53515        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53516        assert_eq_m256(r, _mm256_setzero_ps());
53517        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53518        let e = _mm256_set1_ps(6.);
53519        assert_eq_m256(r, e);
53520    }
53521
53522    #[simd_test(enable = "avx512f")]
53523    unsafe fn test_mm512_permutex2var_epi32() {
53524        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53525        #[rustfmt::skip]
53526        let idx = _mm512_set_epi32(
53527            1, 1 << 4, 2, 1 << 4,
53528            3, 1 << 4, 4, 1 << 4,
53529            5, 1 << 4, 6, 1 << 4,
53530            7, 1 << 4, 8, 1 << 4,
53531        );
53532        let b = _mm512_set1_epi32(100);
53533        let r = _mm512_permutex2var_epi32(a, idx, b);
53534        let e = _mm512_set_epi32(
53535            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53536        );
53537        assert_eq_m512i(r, e);
53538    }
53539
53540    #[simd_test(enable = "avx512f")]
53541    unsafe fn test_mm512_mask_permutex2var_epi32() {
53542        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53543        #[rustfmt::skip]
53544        let idx = _mm512_set_epi32(
53545            1, 1 << 4, 2, 1 << 4,
53546            3, 1 << 4, 4, 1 << 4,
53547            5, 1 << 4, 6, 1 << 4,
53548            7, 1 << 4, 8, 1 << 4,
53549        );
53550        let b = _mm512_set1_epi32(100);
53551        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53552        assert_eq_m512i(r, a);
53553        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53554        let e = _mm512_set_epi32(
53555            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53556        );
53557        assert_eq_m512i(r, e);
53558    }
53559
53560    #[simd_test(enable = "avx512f")]
53561    unsafe fn test_mm512_maskz_permutex2var_epi32() {
53562        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53563        #[rustfmt::skip]
53564        let idx = _mm512_set_epi32(
53565            1, 1 << 4, 2, 1 << 4,
53566            3, 1 << 4, 4, 1 << 4,
53567            5, 1 << 4, 6, 1 << 4,
53568            7, 1 << 4, 8, 1 << 4,
53569        );
53570        let b = _mm512_set1_epi32(100);
53571        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53572        assert_eq_m512i(r, _mm512_setzero_si512());
53573        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53574        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53575        assert_eq_m512i(r, e);
53576    }
53577
53578    #[simd_test(enable = "avx512f")]
53579    unsafe fn test_mm512_mask2_permutex2var_epi32() {
53580        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53581        #[rustfmt::skip]
53582        let idx = _mm512_set_epi32(
53583            1000, 1 << 4, 2000, 1 << 4,
53584            3000, 1 << 4, 4000, 1 << 4,
53585            5, 1 << 4, 6, 1 << 4,
53586            7, 1 << 4, 8, 1 << 4,
53587        );
53588        let b = _mm512_set1_epi32(100);
53589        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53590        assert_eq_m512i(r, idx);
53591        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53592        #[rustfmt::skip]
53593        let e = _mm512_set_epi32(
53594            1000, 1 << 4, 2000, 1 << 4,
53595            3000, 1 << 4, 4000, 1 << 4,
53596            10, 100, 9, 100,
53597            8, 100, 7, 100,
53598        );
53599        assert_eq_m512i(r, e);
53600    }
53601
53602    #[simd_test(enable = "avx512f,avx512vl")]
53603    unsafe fn test_mm256_permutex2var_epi32() {
53604        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53605        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53606        let b = _mm256_set1_epi32(100);
53607        let r = _mm256_permutex2var_epi32(a, idx, b);
53608        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53609        assert_eq_m256i(r, e);
53610    }
53611
53612    #[simd_test(enable = "avx512f,avx512vl")]
53613    unsafe fn test_mm256_mask_permutex2var_epi32() {
53614        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53615        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53616        let b = _mm256_set1_epi32(100);
53617        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53618        assert_eq_m256i(r, a);
53619        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53620        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53621        assert_eq_m256i(r, e);
53622    }
53623
53624    #[simd_test(enable = "avx512f,avx512vl")]
53625    unsafe fn test_mm256_maskz_permutex2var_epi32() {
53626        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53627        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53628        let b = _mm256_set1_epi32(100);
53629        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53630        assert_eq_m256i(r, _mm256_setzero_si256());
53631        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53632        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53633        assert_eq_m256i(r, e);
53634    }
53635
53636    #[simd_test(enable = "avx512f,avx512vl")]
53637    unsafe fn test_mm256_mask2_permutex2var_epi32() {
53638        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53639        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53640        let b = _mm256_set1_epi32(100);
53641        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53642        assert_eq_m256i(r, idx);
53643        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53644        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53645        assert_eq_m256i(r, e);
53646    }
53647
53648    #[simd_test(enable = "avx512f,avx512vl")]
53649    unsafe fn test_mm_permutex2var_epi32() {
53650        let a = _mm_set_epi32(0, 1, 2, 3);
53651        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53652        let b = _mm_set1_epi32(100);
53653        let r = _mm_permutex2var_epi32(a, idx, b);
53654        let e = _mm_set_epi32(2, 100, 1, 100);
53655        assert_eq_m128i(r, e);
53656    }
53657
53658    #[simd_test(enable = "avx512f,avx512vl")]
53659    unsafe fn test_mm_mask_permutex2var_epi32() {
53660        let a = _mm_set_epi32(0, 1, 2, 3);
53661        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53662        let b = _mm_set1_epi32(100);
53663        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53664        assert_eq_m128i(r, a);
53665        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53666        let e = _mm_set_epi32(2, 100, 1, 100);
53667        assert_eq_m128i(r, e);
53668    }
53669
53670    #[simd_test(enable = "avx512f,avx512vl")]
53671    unsafe fn test_mm_maskz_permutex2var_epi32() {
53672        let a = _mm_set_epi32(0, 1, 2, 3);
53673        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53674        let b = _mm_set1_epi32(100);
53675        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53676        assert_eq_m128i(r, _mm_setzero_si128());
53677        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53678        let e = _mm_set_epi32(2, 100, 1, 100);
53679        assert_eq_m128i(r, e);
53680    }
53681
53682    #[simd_test(enable = "avx512f,avx512vl")]
53683    unsafe fn test_mm_mask2_permutex2var_epi32() {
53684        let a = _mm_set_epi32(0, 1, 2, 3);
53685        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53686        let b = _mm_set1_epi32(100);
53687        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53688        assert_eq_m128i(r, idx);
53689        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53690        let e = _mm_set_epi32(2, 100, 1, 100);
53691        assert_eq_m128i(r, e);
53692    }
53693
53694    #[simd_test(enable = "avx512f")]
53695    unsafe fn test_mm512_permutex2var_ps() {
53696        let a = _mm512_set_ps(
53697            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53698        );
53699        #[rustfmt::skip]
53700        let idx = _mm512_set_epi32(
53701            1, 1 << 4, 2, 1 << 4,
53702            3, 1 << 4, 4, 1 << 4,
53703            5, 1 << 4, 6, 1 << 4,
53704            7, 1 << 4, 8, 1 << 4,
53705        );
53706        let b = _mm512_set1_ps(100.);
53707        let r = _mm512_permutex2var_ps(a, idx, b);
53708        let e = _mm512_set_ps(
53709            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53710        );
53711        assert_eq_m512(r, e);
53712    }
53713
53714    #[simd_test(enable = "avx512f")]
53715    unsafe fn test_mm512_mask_permutex2var_ps() {
53716        let a = _mm512_set_ps(
53717            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53718        );
53719        #[rustfmt::skip]
53720        let idx = _mm512_set_epi32(
53721            1, 1 << 4, 2, 1 << 4,
53722            3, 1 << 4, 4, 1 << 4,
53723            5, 1 << 4, 6, 1 << 4,
53724            7, 1 << 4, 8, 1 << 4,
53725        );
53726        let b = _mm512_set1_ps(100.);
53727        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53728        assert_eq_m512(r, a);
53729        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53730        let e = _mm512_set_ps(
53731            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53732        );
53733        assert_eq_m512(r, e);
53734    }
53735
53736    #[simd_test(enable = "avx512f")]
53737    unsafe fn test_mm512_maskz_permutex2var_ps() {
53738        let a = _mm512_set_ps(
53739            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53740        );
53741        #[rustfmt::skip]
53742        let idx = _mm512_set_epi32(
53743            1, 1 << 4, 2, 1 << 4,
53744            3, 1 << 4, 4, 1 << 4,
53745            5, 1 << 4, 6, 1 << 4,
53746            7, 1 << 4, 8, 1 << 4,
53747        );
53748        let b = _mm512_set1_ps(100.);
53749        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53750        assert_eq_m512(r, _mm512_setzero_ps());
53751        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53752        let e = _mm512_set_ps(
53753            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53754        );
53755        assert_eq_m512(r, e);
53756    }
53757
53758    #[simd_test(enable = "avx512f")]
53759    unsafe fn test_mm512_mask2_permutex2var_ps() {
53760        let a = _mm512_set_ps(
53761            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53762        );
53763        #[rustfmt::skip]
53764        let idx = _mm512_set_epi32(
53765            1, 1 << 4, 2, 1 << 4,
53766            3, 1 << 4, 4, 1 << 4,
53767            5, 1 << 4, 6, 1 << 4,
53768            7, 1 << 4, 8, 1 << 4,
53769        );
53770        let b = _mm512_set1_ps(100.);
53771        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53772        assert_eq_m512(r, _mm512_castsi512_ps(idx));
53773        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53774        let e = _mm512_set_ps(
53775            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53776        );
53777        assert_eq_m512(r, e);
53778    }
53779
53780    #[simd_test(enable = "avx512f,avx512vl")]
53781    unsafe fn test_mm256_permutex2var_ps() {
53782        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53783        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53784        let b = _mm256_set1_ps(100.);
53785        let r = _mm256_permutex2var_ps(a, idx, b);
53786        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53787        assert_eq_m256(r, e);
53788    }
53789
53790    #[simd_test(enable = "avx512f,avx512vl")]
53791    unsafe fn test_mm256_mask_permutex2var_ps() {
53792        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53793        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53794        let b = _mm256_set1_ps(100.);
53795        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53796        assert_eq_m256(r, a);
53797        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53798        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53799        assert_eq_m256(r, e);
53800    }
53801
53802    #[simd_test(enable = "avx512f,avx512vl")]
53803    unsafe fn test_mm256_maskz_permutex2var_ps() {
53804        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53805        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53806        let b = _mm256_set1_ps(100.);
53807        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53808        assert_eq_m256(r, _mm256_setzero_ps());
53809        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53810        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53811        assert_eq_m256(r, e);
53812    }
53813
53814    #[simd_test(enable = "avx512f,avx512vl")]
53815    unsafe fn test_mm256_mask2_permutex2var_ps() {
53816        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53817        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53818        let b = _mm256_set1_ps(100.);
53819        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53820        assert_eq_m256(r, _mm256_castsi256_ps(idx));
53821        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53822        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53823        assert_eq_m256(r, e);
53824    }
53825
53826    #[simd_test(enable = "avx512f,avx512vl")]
53827    unsafe fn test_mm_permutex2var_ps() {
53828        let a = _mm_set_ps(0., 1., 2., 3.);
53829        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53830        let b = _mm_set1_ps(100.);
53831        let r = _mm_permutex2var_ps(a, idx, b);
53832        let e = _mm_set_ps(2., 100., 1., 100.);
53833        assert_eq_m128(r, e);
53834    }
53835
53836    #[simd_test(enable = "avx512f,avx512vl")]
53837    unsafe fn test_mm_mask_permutex2var_ps() {
53838        let a = _mm_set_ps(0., 1., 2., 3.);
53839        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53840        let b = _mm_set1_ps(100.);
53841        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53842        assert_eq_m128(r, a);
53843        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53844        let e = _mm_set_ps(2., 100., 1., 100.);
53845        assert_eq_m128(r, e);
53846    }
53847
53848    #[simd_test(enable = "avx512f,avx512vl")]
53849    unsafe fn test_mm_maskz_permutex2var_ps() {
53850        let a = _mm_set_ps(0., 1., 2., 3.);
53851        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53852        let b = _mm_set1_ps(100.);
53853        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53854        assert_eq_m128(r, _mm_setzero_ps());
53855        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53856        let e = _mm_set_ps(2., 100., 1., 100.);
53857        assert_eq_m128(r, e);
53858    }
53859
53860    #[simd_test(enable = "avx512f,avx512vl")]
53861    unsafe fn test_mm_mask2_permutex2var_ps() {
53862        let a = _mm_set_ps(0., 1., 2., 3.);
53863        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53864        let b = _mm_set1_ps(100.);
53865        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53866        assert_eq_m128(r, _mm_castsi128_ps(idx));
53867        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53868        let e = _mm_set_ps(2., 100., 1., 100.);
53869        assert_eq_m128(r, e);
53870    }
53871
53872    #[simd_test(enable = "avx512f")]
53873    unsafe fn test_mm512_shuffle_epi32() {
53874        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53875        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53876        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53877        assert_eq_m512i(r, e);
53878    }
53879
53880    #[simd_test(enable = "avx512f")]
53881    unsafe fn test_mm512_mask_shuffle_epi32() {
53882        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53883        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53884        assert_eq_m512i(r, a);
53885        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53886        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53887        assert_eq_m512i(r, e);
53888    }
53889
53890    #[simd_test(enable = "avx512f")]
53891    unsafe fn test_mm512_maskz_shuffle_epi32() {
53892        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53893        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53894        assert_eq_m512i(r, _mm512_setzero_si512());
53895        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53896        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53897        assert_eq_m512i(r, e);
53898    }
53899
53900    #[simd_test(enable = "avx512f,avx512vl")]
53901    unsafe fn test_mm256_mask_shuffle_epi32() {
53902        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53903        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53904        assert_eq_m256i(r, a);
53905        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53906        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53907        assert_eq_m256i(r, e);
53908    }
53909
53910    #[simd_test(enable = "avx512f,avx512vl")]
53911    unsafe fn test_mm256_maskz_shuffle_epi32() {
53912        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53913        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53914        assert_eq_m256i(r, _mm256_setzero_si256());
53915        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53916        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53917        assert_eq_m256i(r, e);
53918    }
53919
53920    #[simd_test(enable = "avx512f,avx512vl")]
53921    unsafe fn test_mm_mask_shuffle_epi32() {
53922        let a = _mm_set_epi32(1, 4, 5, 8);
53923        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53924        assert_eq_m128i(r, a);
53925        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53926        let e = _mm_set_epi32(8, 8, 1, 1);
53927        assert_eq_m128i(r, e);
53928    }
53929
53930    #[simd_test(enable = "avx512f,avx512vl")]
53931    unsafe fn test_mm_maskz_shuffle_epi32() {
53932        let a = _mm_set_epi32(1, 4, 5, 8);
53933        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53934        assert_eq_m128i(r, _mm_setzero_si128());
53935        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53936        let e = _mm_set_epi32(8, 8, 1, 1);
53937        assert_eq_m128i(r, e);
53938    }
53939
53940    #[simd_test(enable = "avx512f")]
53941    unsafe fn test_mm512_shuffle_ps() {
53942        let a = _mm512_setr_ps(
53943            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53944        );
53945        let b = _mm512_setr_ps(
53946            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53947        );
53948        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
53949        let e = _mm512_setr_ps(
53950            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53951        );
53952        assert_eq_m512(r, e);
53953    }
53954
53955    #[simd_test(enable = "avx512f")]
53956    unsafe fn test_mm512_mask_shuffle_ps() {
53957        let a = _mm512_setr_ps(
53958            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53959        );
53960        let b = _mm512_setr_ps(
53961            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53962        );
53963        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
53964        assert_eq_m512(r, a);
53965        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
53966        let e = _mm512_setr_ps(
53967            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53968        );
53969        assert_eq_m512(r, e);
53970    }
53971
53972    #[simd_test(enable = "avx512f")]
53973    unsafe fn test_mm512_maskz_shuffle_ps() {
53974        let a = _mm512_setr_ps(
53975            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53976        );
53977        let b = _mm512_setr_ps(
53978            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53979        );
53980        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
53981        assert_eq_m512(r, _mm512_setzero_ps());
53982        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
53983        let e = _mm512_setr_ps(
53984            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
53985        );
53986        assert_eq_m512(r, e);
53987    }
53988
53989    #[simd_test(enable = "avx512f,avx512vl")]
53990    unsafe fn test_mm256_mask_shuffle_ps() {
53991        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53992        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53993        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53994        assert_eq_m256(r, a);
53995        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
53996        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53997        assert_eq_m256(r, e);
53998    }
53999
54000    #[simd_test(enable = "avx512f,avx512vl")]
54001    unsafe fn test_mm256_maskz_shuffle_ps() {
54002        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54003        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54004        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54005        assert_eq_m256(r, _mm256_setzero_ps());
54006        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
54007        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
54008        assert_eq_m256(r, e);
54009    }
54010
54011    #[simd_test(enable = "avx512f,avx512vl")]
54012    unsafe fn test_mm_mask_shuffle_ps() {
54013        let a = _mm_set_ps(1., 4., 5., 8.);
54014        let b = _mm_set_ps(2., 3., 6., 7.);
54015        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54016        assert_eq_m128(r, a);
54017        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54018        let e = _mm_set_ps(7., 7., 1., 1.);
54019        assert_eq_m128(r, e);
54020    }
54021
54022    #[simd_test(enable = "avx512f,avx512vl")]
54023    unsafe fn test_mm_maskz_shuffle_ps() {
54024        let a = _mm_set_ps(1., 4., 5., 8.);
54025        let b = _mm_set_ps(2., 3., 6., 7.);
54026        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54027        assert_eq_m128(r, _mm_setzero_ps());
54028        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54029        let e = _mm_set_ps(7., 7., 1., 1.);
54030        assert_eq_m128(r, e);
54031    }
54032
54033    #[simd_test(enable = "avx512f")]
54034    unsafe fn test_mm512_shuffle_i32x4() {
54035        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54036        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54037        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54038        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54039        assert_eq_m512i(r, e);
54040    }
54041
54042    #[simd_test(enable = "avx512f")]
54043    unsafe fn test_mm512_mask_shuffle_i32x4() {
54044        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54045        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54046        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54047        assert_eq_m512i(r, a);
54048        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54049        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54050        assert_eq_m512i(r, e);
54051    }
54052
54053    #[simd_test(enable = "avx512f")]
54054    unsafe fn test_mm512_maskz_shuffle_i32x4() {
54055        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54056        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54057        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54058        assert_eq_m512i(r, _mm512_setzero_si512());
54059        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54060        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54061        assert_eq_m512i(r, e);
54062    }
54063
54064    #[simd_test(enable = "avx512f,avx512vl")]
54065    unsafe fn test_mm256_shuffle_i32x4() {
54066        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54067        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54068        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54069        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54070        assert_eq_m256i(r, e);
54071    }
54072
54073    #[simd_test(enable = "avx512f,avx512vl")]
54074    unsafe fn test_mm256_mask_shuffle_i32x4() {
54075        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54076        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54077        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54078        assert_eq_m256i(r, a);
54079        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54080        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54081        assert_eq_m256i(r, e);
54082    }
54083
54084    #[simd_test(enable = "avx512f,avx512vl")]
54085    unsafe fn test_mm256_maskz_shuffle_i32x4() {
54086        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54087        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54088        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54089        assert_eq_m256i(r, _mm256_setzero_si256());
54090        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54091        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54092        assert_eq_m256i(r, e);
54093    }
54094
54095    #[simd_test(enable = "avx512f")]
54096    unsafe fn test_mm512_shuffle_f32x4() {
54097        let a = _mm512_setr_ps(
54098            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54099        );
54100        let b = _mm512_setr_ps(
54101            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54102        );
54103        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54104        let e = _mm512_setr_ps(
54105            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54106        );
54107        assert_eq_m512(r, e);
54108    }
54109
54110    #[simd_test(enable = "avx512f")]
54111    unsafe fn test_mm512_mask_shuffle_f32x4() {
54112        let a = _mm512_setr_ps(
54113            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54114        );
54115        let b = _mm512_setr_ps(
54116            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54117        );
54118        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54119        assert_eq_m512(r, a);
54120        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54121        let e = _mm512_setr_ps(
54122            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54123        );
54124        assert_eq_m512(r, e);
54125    }
54126
54127    #[simd_test(enable = "avx512f")]
54128    unsafe fn test_mm512_maskz_shuffle_f32x4() {
54129        let a = _mm512_setr_ps(
54130            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54131        );
54132        let b = _mm512_setr_ps(
54133            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54134        );
54135        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54136        assert_eq_m512(r, _mm512_setzero_ps());
54137        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54138        let e = _mm512_setr_ps(
54139            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54140        );
54141        assert_eq_m512(r, e);
54142    }
54143
54144    #[simd_test(enable = "avx512f,avx512vl")]
54145    unsafe fn test_mm256_shuffle_f32x4() {
54146        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54147        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54148        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54149        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54150        assert_eq_m256(r, e);
54151    }
54152
54153    #[simd_test(enable = "avx512f,avx512vl")]
54154    unsafe fn test_mm256_mask_shuffle_f32x4() {
54155        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54156        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54157        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54158        assert_eq_m256(r, a);
54159        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54160        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54161        assert_eq_m256(r, e);
54162    }
54163
54164    #[simd_test(enable = "avx512f,avx512vl")]
54165    unsafe fn test_mm256_maskz_shuffle_f32x4() {
54166        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54167        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54168        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54169        assert_eq_m256(r, _mm256_setzero_ps());
54170        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54171        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54172        assert_eq_m256(r, e);
54173    }
54174
54175    #[simd_test(enable = "avx512f")]
54176    unsafe fn test_mm512_extractf32x4_ps() {
54177        let a = _mm512_setr_ps(
54178            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54179        );
54180        let r = _mm512_extractf32x4_ps::<1>(a);
54181        let e = _mm_setr_ps(5., 6., 7., 8.);
54182        assert_eq_m128(r, e);
54183    }
54184
54185    #[simd_test(enable = "avx512f")]
54186    unsafe fn test_mm512_mask_extractf32x4_ps() {
54187        let a = _mm512_setr_ps(
54188            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54189        );
54190        let src = _mm_set1_ps(100.);
54191        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54192        assert_eq_m128(r, src);
54193        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54194        let e = _mm_setr_ps(5., 6., 7., 8.);
54195        assert_eq_m128(r, e);
54196    }
54197
54198    #[simd_test(enable = "avx512f")]
54199    unsafe fn test_mm512_maskz_extractf32x4_ps() {
54200        let a = _mm512_setr_ps(
54201            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54202        );
54203        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54204        assert_eq_m128(r, _mm_setzero_ps());
54205        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54206        let e = _mm_setr_ps(5., 0., 0., 0.);
54207        assert_eq_m128(r, e);
54208    }
54209
54210    #[simd_test(enable = "avx512f,avx512vl")]
54211    unsafe fn test_mm256_extractf32x4_ps() {
54212        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54213        let r = _mm256_extractf32x4_ps::<1>(a);
54214        let e = _mm_set_ps(1., 2., 3., 4.);
54215        assert_eq_m128(r, e);
54216    }
54217
54218    #[simd_test(enable = "avx512f,avx512vl")]
54219    unsafe fn test_mm256_mask_extractf32x4_ps() {
54220        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54221        let src = _mm_set1_ps(100.);
54222        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54223        assert_eq_m128(r, src);
54224        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54225        let e = _mm_set_ps(1., 2., 3., 4.);
54226        assert_eq_m128(r, e);
54227    }
54228
54229    #[simd_test(enable = "avx512f,avx512vl")]
54230    unsafe fn test_mm256_maskz_extractf32x4_ps() {
54231        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54232        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54233        assert_eq_m128(r, _mm_setzero_ps());
54234        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54235        let e = _mm_set_ps(1., 2., 3., 4.);
54236        assert_eq_m128(r, e);
54237    }
54238
54239    #[simd_test(enable = "avx512f")]
54240    unsafe fn test_mm512_extracti32x4_epi32() {
54241        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54242        let r = _mm512_extracti32x4_epi32::<1>(a);
54243        let e = _mm_setr_epi32(5, 6, 7, 8);
54244        assert_eq_m128i(r, e);
54245    }
54246
54247    #[simd_test(enable = "avx512f")]
54248    unsafe fn test_mm512_mask_extracti32x4_epi32() {
54249        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54250        let src = _mm_set1_epi32(100);
54251        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54252        assert_eq_m128i(r, src);
54253        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54254        let e = _mm_setr_epi32(5, 6, 7, 8);
54255        assert_eq_m128i(r, e);
54256    }
54257
54258    #[simd_test(enable = "avx512f,avx512vl")]
54259    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54260        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54261        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54262        assert_eq_m128i(r, _mm_setzero_si128());
54263        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54264        let e = _mm_setr_epi32(5, 0, 0, 0);
54265        assert_eq_m128i(r, e);
54266    }
54267
54268    #[simd_test(enable = "avx512f,avx512vl")]
54269    unsafe fn test_mm256_extracti32x4_epi32() {
54270        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54271        let r = _mm256_extracti32x4_epi32::<1>(a);
54272        let e = _mm_set_epi32(1, 2, 3, 4);
54273        assert_eq_m128i(r, e);
54274    }
54275
54276    #[simd_test(enable = "avx512f,avx512vl")]
54277    unsafe fn test_mm256_mask_extracti32x4_epi32() {
54278        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54279        let src = _mm_set1_epi32(100);
54280        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54281        assert_eq_m128i(r, src);
54282        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54283        let e = _mm_set_epi32(1, 2, 3, 4);
54284        assert_eq_m128i(r, e);
54285    }
54286
54287    #[simd_test(enable = "avx512f,avx512vl")]
54288    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54289        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54290        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54291        assert_eq_m128i(r, _mm_setzero_si128());
54292        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54293        let e = _mm_set_epi32(1, 2, 3, 4);
54294        assert_eq_m128i(r, e);
54295    }
54296
54297    #[simd_test(enable = "avx512f")]
54298    unsafe fn test_mm512_moveldup_ps() {
54299        let a = _mm512_setr_ps(
54300            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54301        );
54302        let r = _mm512_moveldup_ps(a);
54303        let e = _mm512_setr_ps(
54304            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54305        );
54306        assert_eq_m512(r, e);
54307    }
54308
54309    #[simd_test(enable = "avx512f")]
54310    unsafe fn test_mm512_mask_moveldup_ps() {
54311        let a = _mm512_setr_ps(
54312            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54313        );
54314        let r = _mm512_mask_moveldup_ps(a, 0, a);
54315        assert_eq_m512(r, a);
54316        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54317        let e = _mm512_setr_ps(
54318            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54319        );
54320        assert_eq_m512(r, e);
54321    }
54322
54323    #[simd_test(enable = "avx512f")]
54324    unsafe fn test_mm512_maskz_moveldup_ps() {
54325        let a = _mm512_setr_ps(
54326            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54327        );
54328        let r = _mm512_maskz_moveldup_ps(0, a);
54329        assert_eq_m512(r, _mm512_setzero_ps());
54330        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54331        let e = _mm512_setr_ps(
54332            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54333        );
54334        assert_eq_m512(r, e);
54335    }
54336
54337    #[simd_test(enable = "avx512f,avx512vl")]
54338    unsafe fn test_mm256_mask_moveldup_ps() {
54339        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54340        let r = _mm256_mask_moveldup_ps(a, 0, a);
54341        assert_eq_m256(r, a);
54342        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54343        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54344        assert_eq_m256(r, e);
54345    }
54346
54347    #[simd_test(enable = "avx512f,avx512vl")]
54348    unsafe fn test_mm256_maskz_moveldup_ps() {
54349        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54350        let r = _mm256_maskz_moveldup_ps(0, a);
54351        assert_eq_m256(r, _mm256_setzero_ps());
54352        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54353        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54354        assert_eq_m256(r, e);
54355    }
54356
54357    #[simd_test(enable = "avx512f,avx512vl")]
54358    unsafe fn test_mm_mask_moveldup_ps() {
54359        let a = _mm_set_ps(1., 2., 3., 4.);
54360        let r = _mm_mask_moveldup_ps(a, 0, a);
54361        assert_eq_m128(r, a);
54362        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54363        let e = _mm_set_ps(2., 2., 4., 4.);
54364        assert_eq_m128(r, e);
54365    }
54366
54367    #[simd_test(enable = "avx512f,avx512vl")]
54368    unsafe fn test_mm_maskz_moveldup_ps() {
54369        let a = _mm_set_ps(1., 2., 3., 4.);
54370        let r = _mm_maskz_moveldup_ps(0, a);
54371        assert_eq_m128(r, _mm_setzero_ps());
54372        let r = _mm_maskz_moveldup_ps(0b00001111, a);
54373        let e = _mm_set_ps(2., 2., 4., 4.);
54374        assert_eq_m128(r, e);
54375    }
54376
54377    #[simd_test(enable = "avx512f")]
54378    unsafe fn test_mm512_movehdup_ps() {
54379        let a = _mm512_setr_ps(
54380            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54381        );
54382        let r = _mm512_movehdup_ps(a);
54383        let e = _mm512_setr_ps(
54384            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54385        );
54386        assert_eq_m512(r, e);
54387    }
54388
54389    #[simd_test(enable = "avx512f")]
54390    unsafe fn test_mm512_mask_movehdup_ps() {
54391        let a = _mm512_setr_ps(
54392            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54393        );
54394        let r = _mm512_mask_movehdup_ps(a, 0, a);
54395        assert_eq_m512(r, a);
54396        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54397        let e = _mm512_setr_ps(
54398            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54399        );
54400        assert_eq_m512(r, e);
54401    }
54402
54403    #[simd_test(enable = "avx512f")]
54404    unsafe fn test_mm512_maskz_movehdup_ps() {
54405        let a = _mm512_setr_ps(
54406            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54407        );
54408        let r = _mm512_maskz_movehdup_ps(0, a);
54409        assert_eq_m512(r, _mm512_setzero_ps());
54410        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54411        let e = _mm512_setr_ps(
54412            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54413        );
54414        assert_eq_m512(r, e);
54415    }
54416
54417    #[simd_test(enable = "avx512f,avx512vl")]
54418    unsafe fn test_mm256_mask_movehdup_ps() {
54419        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54420        let r = _mm256_mask_movehdup_ps(a, 0, a);
54421        assert_eq_m256(r, a);
54422        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54423        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54424        assert_eq_m256(r, e);
54425    }
54426
54427    #[simd_test(enable = "avx512f,avx512vl")]
54428    unsafe fn test_mm256_maskz_movehdup_ps() {
54429        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54430        let r = _mm256_maskz_movehdup_ps(0, a);
54431        assert_eq_m256(r, _mm256_setzero_ps());
54432        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54433        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54434        assert_eq_m256(r, e);
54435    }
54436
54437    #[simd_test(enable = "avx512f,avx512vl")]
54438    unsafe fn test_mm_mask_movehdup_ps() {
54439        let a = _mm_set_ps(1., 2., 3., 4.);
54440        let r = _mm_mask_movehdup_ps(a, 0, a);
54441        assert_eq_m128(r, a);
54442        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54443        let e = _mm_set_ps(1., 1., 3., 3.);
54444        assert_eq_m128(r, e);
54445    }
54446
54447    #[simd_test(enable = "avx512f,avx512vl")]
54448    unsafe fn test_mm_maskz_movehdup_ps() {
54449        let a = _mm_set_ps(1., 2., 3., 4.);
54450        let r = _mm_maskz_movehdup_ps(0, a);
54451        assert_eq_m128(r, _mm_setzero_ps());
54452        let r = _mm_maskz_movehdup_ps(0b00001111, a);
54453        let e = _mm_set_ps(1., 1., 3., 3.);
54454        assert_eq_m128(r, e);
54455    }
54456
54457    #[simd_test(enable = "avx512f")]
54458    unsafe fn test_mm512_inserti32x4() {
54459        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54460        let b = _mm_setr_epi32(17, 18, 19, 20);
54461        let r = _mm512_inserti32x4::<0>(a, b);
54462        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54463        assert_eq_m512i(r, e);
54464    }
54465
54466    #[simd_test(enable = "avx512f")]
54467    unsafe fn test_mm512_mask_inserti32x4() {
54468        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54469        let b = _mm_setr_epi32(17, 18, 19, 20);
54470        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54471        assert_eq_m512i(r, a);
54472        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54473        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54474        assert_eq_m512i(r, e);
54475    }
54476
54477    #[simd_test(enable = "avx512f")]
54478    unsafe fn test_mm512_maskz_inserti32x4() {
54479        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54480        let b = _mm_setr_epi32(17, 18, 19, 20);
54481        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54482        assert_eq_m512i(r, _mm512_setzero_si512());
54483        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54484        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54485        assert_eq_m512i(r, e);
54486    }
54487
54488    #[simd_test(enable = "avx512f,avx512vl")]
54489    unsafe fn test_mm256_inserti32x4() {
54490        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54491        let b = _mm_set_epi32(17, 18, 19, 20);
54492        let r = _mm256_inserti32x4::<1>(a, b);
54493        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54494        assert_eq_m256i(r, e);
54495    }
54496
54497    #[simd_test(enable = "avx512f,avx512vl")]
54498    unsafe fn test_mm256_mask_inserti32x4() {
54499        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54500        let b = _mm_set_epi32(17, 18, 19, 20);
54501        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54502        assert_eq_m256i(r, a);
54503        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54504        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54505        assert_eq_m256i(r, e);
54506    }
54507
54508    #[simd_test(enable = "avx512f,avx512vl")]
54509    unsafe fn test_mm256_maskz_inserti32x4() {
54510        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54511        let b = _mm_set_epi32(17, 18, 19, 20);
54512        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54513        assert_eq_m256i(r, _mm256_setzero_si256());
54514        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54515        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54516        assert_eq_m256i(r, e);
54517    }
54518
54519    #[simd_test(enable = "avx512f")]
54520    unsafe fn test_mm512_insertf32x4() {
54521        let a = _mm512_setr_ps(
54522            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54523        );
54524        let b = _mm_setr_ps(17., 18., 19., 20.);
54525        let r = _mm512_insertf32x4::<0>(a, b);
54526        let e = _mm512_setr_ps(
54527            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54528        );
54529        assert_eq_m512(r, e);
54530    }
54531
54532    #[simd_test(enable = "avx512f")]
54533    unsafe fn test_mm512_mask_insertf32x4() {
54534        let a = _mm512_setr_ps(
54535            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54536        );
54537        let b = _mm_setr_ps(17., 18., 19., 20.);
54538        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54539        assert_eq_m512(r, a);
54540        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54541        let e = _mm512_setr_ps(
54542            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54543        );
54544        assert_eq_m512(r, e);
54545    }
54546
54547    #[simd_test(enable = "avx512f")]
54548    unsafe fn test_mm512_maskz_insertf32x4() {
54549        let a = _mm512_setr_ps(
54550            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54551        );
54552        let b = _mm_setr_ps(17., 18., 19., 20.);
54553        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54554        assert_eq_m512(r, _mm512_setzero_ps());
54555        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54556        let e = _mm512_setr_ps(
54557            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54558        );
54559        assert_eq_m512(r, e);
54560    }
54561
54562    #[simd_test(enable = "avx512f,avx512vl")]
54563    unsafe fn test_mm256_insertf32x4() {
54564        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54565        let b = _mm_set_ps(17., 18., 19., 20.);
54566        let r = _mm256_insertf32x4::<1>(a, b);
54567        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54568        assert_eq_m256(r, e);
54569    }
54570
54571    #[simd_test(enable = "avx512f,avx512vl")]
54572    unsafe fn test_mm256_mask_insertf32x4() {
54573        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54574        let b = _mm_set_ps(17., 18., 19., 20.);
54575        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54576        assert_eq_m256(r, a);
54577        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54578        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54579        assert_eq_m256(r, e);
54580    }
54581
54582    #[simd_test(enable = "avx512f,avx512vl")]
54583    unsafe fn test_mm256_maskz_insertf32x4() {
54584        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54585        let b = _mm_set_ps(17., 18., 19., 20.);
54586        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54587        assert_eq_m256(r, _mm256_setzero_ps());
54588        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54589        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54590        assert_eq_m256(r, e);
54591    }
54592
54593    #[simd_test(enable = "avx512f")]
54594    unsafe fn test_mm512_castps128_ps512() {
54595        let a = _mm_setr_ps(17., 18., 19., 20.);
54596        let r = _mm512_castps128_ps512(a);
54597        assert_eq_m128(_mm512_castps512_ps128(r), a);
54598    }
54599
54600    #[simd_test(enable = "avx512f")]
54601    unsafe fn test_mm512_castps256_ps512() {
54602        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54603        let r = _mm512_castps256_ps512(a);
54604        assert_eq_m256(_mm512_castps512_ps256(r), a);
54605    }
54606
54607    #[simd_test(enable = "avx512f")]
54608    unsafe fn test_mm512_zextps128_ps512() {
54609        let a = _mm_setr_ps(17., 18., 19., 20.);
54610        let r = _mm512_zextps128_ps512(a);
54611        let e = _mm512_setr_ps(
54612            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54613        );
54614        assert_eq_m512(r, e);
54615    }
54616
54617    #[simd_test(enable = "avx512f")]
54618    unsafe fn test_mm512_zextps256_ps512() {
54619        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54620        let r = _mm512_zextps256_ps512(a);
54621        let e = _mm512_setr_ps(
54622            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54623        );
54624        assert_eq_m512(r, e);
54625    }
54626
54627    #[simd_test(enable = "avx512f")]
54628    unsafe fn test_mm512_castps512_ps128() {
54629        let a = _mm512_setr_ps(
54630            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54631        );
54632        let r = _mm512_castps512_ps128(a);
54633        let e = _mm_setr_ps(17., 18., 19., 20.);
54634        assert_eq_m128(r, e);
54635    }
54636
54637    #[simd_test(enable = "avx512f")]
54638    unsafe fn test_mm512_castps512_ps256() {
54639        let a = _mm512_setr_ps(
54640            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54641        );
54642        let r = _mm512_castps512_ps256(a);
54643        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54644        assert_eq_m256(r, e);
54645    }
54646
54647    #[simd_test(enable = "avx512f")]
54648    unsafe fn test_mm512_castps_pd() {
54649        let a = _mm512_set1_ps(1.);
54650        let r = _mm512_castps_pd(a);
54651        let e = _mm512_set1_pd(0.007812501848093234);
54652        assert_eq_m512d(r, e);
54653    }
54654
54655    #[simd_test(enable = "avx512f")]
54656    unsafe fn test_mm512_castps_si512() {
54657        let a = _mm512_set1_ps(1.);
54658        let r = _mm512_castps_si512(a);
54659        let e = _mm512_set1_epi32(1065353216);
54660        assert_eq_m512i(r, e);
54661    }
54662
54663    #[simd_test(enable = "avx512f")]
54664    unsafe fn test_mm512_broadcastd_epi32() {
54665        let a = _mm_set_epi32(17, 18, 19, 20);
54666        let r = _mm512_broadcastd_epi32(a);
54667        let e = _mm512_set1_epi32(20);
54668        assert_eq_m512i(r, e);
54669    }
54670
54671    #[simd_test(enable = "avx512f")]
54672    unsafe fn test_mm512_mask_broadcastd_epi32() {
54673        let src = _mm512_set1_epi32(20);
54674        let a = _mm_set_epi32(17, 18, 19, 20);
54675        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54676        assert_eq_m512i(r, src);
54677        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54678        let e = _mm512_set1_epi32(20);
54679        assert_eq_m512i(r, e);
54680    }
54681
54682    #[simd_test(enable = "avx512f")]
54683    unsafe fn test_mm512_maskz_broadcastd_epi32() {
54684        let a = _mm_set_epi32(17, 18, 19, 20);
54685        let r = _mm512_maskz_broadcastd_epi32(0, a);
54686        assert_eq_m512i(r, _mm512_setzero_si512());
54687        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54688        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54689        assert_eq_m512i(r, e);
54690    }
54691
54692    #[simd_test(enable = "avx512f,avx512vl")]
54693    unsafe fn test_mm256_mask_broadcastd_epi32() {
54694        let src = _mm256_set1_epi32(20);
54695        let a = _mm_set_epi32(17, 18, 19, 20);
54696        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54697        assert_eq_m256i(r, src);
54698        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54699        let e = _mm256_set1_epi32(20);
54700        assert_eq_m256i(r, e);
54701    }
54702
54703    #[simd_test(enable = "avx512f,avx512vl")]
54704    unsafe fn test_mm256_maskz_broadcastd_epi32() {
54705        let a = _mm_set_epi32(17, 18, 19, 20);
54706        let r = _mm256_maskz_broadcastd_epi32(0, a);
54707        assert_eq_m256i(r, _mm256_setzero_si256());
54708        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54709        let e = _mm256_set1_epi32(20);
54710        assert_eq_m256i(r, e);
54711    }
54712
54713    #[simd_test(enable = "avx512f,avx512vl")]
54714    unsafe fn test_mm_mask_broadcastd_epi32() {
54715        let src = _mm_set1_epi32(20);
54716        let a = _mm_set_epi32(17, 18, 19, 20);
54717        let r = _mm_mask_broadcastd_epi32(src, 0, a);
54718        assert_eq_m128i(r, src);
54719        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54720        let e = _mm_set1_epi32(20);
54721        assert_eq_m128i(r, e);
54722    }
54723
54724    #[simd_test(enable = "avx512f,avx512vl")]
54725    unsafe fn test_mm_maskz_broadcastd_epi32() {
54726        let a = _mm_set_epi32(17, 18, 19, 20);
54727        let r = _mm_maskz_broadcastd_epi32(0, a);
54728        assert_eq_m128i(r, _mm_setzero_si128());
54729        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54730        let e = _mm_set1_epi32(20);
54731        assert_eq_m128i(r, e);
54732    }
54733
54734    #[simd_test(enable = "avx512f")]
54735    unsafe fn test_mm512_broadcastss_ps() {
54736        let a = _mm_set_ps(17., 18., 19., 20.);
54737        let r = _mm512_broadcastss_ps(a);
54738        let e = _mm512_set1_ps(20.);
54739        assert_eq_m512(r, e);
54740    }
54741
54742    #[simd_test(enable = "avx512f")]
54743    unsafe fn test_mm512_mask_broadcastss_ps() {
54744        let src = _mm512_set1_ps(20.);
54745        let a = _mm_set_ps(17., 18., 19., 20.);
54746        let r = _mm512_mask_broadcastss_ps(src, 0, a);
54747        assert_eq_m512(r, src);
54748        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54749        let e = _mm512_set1_ps(20.);
54750        assert_eq_m512(r, e);
54751    }
54752
54753    #[simd_test(enable = "avx512f")]
54754    unsafe fn test_mm512_maskz_broadcastss_ps() {
54755        let a = _mm_set_ps(17., 18., 19., 20.);
54756        let r = _mm512_maskz_broadcastss_ps(0, a);
54757        assert_eq_m512(r, _mm512_setzero_ps());
54758        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54759        let e = _mm512_setr_ps(
54760            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54761        );
54762        assert_eq_m512(r, e);
54763    }
54764
54765    #[simd_test(enable = "avx512f,avx512vl")]
54766    unsafe fn test_mm256_mask_broadcastss_ps() {
54767        let src = _mm256_set1_ps(20.);
54768        let a = _mm_set_ps(17., 18., 19., 20.);
54769        let r = _mm256_mask_broadcastss_ps(src, 0, a);
54770        assert_eq_m256(r, src);
54771        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54772        let e = _mm256_set1_ps(20.);
54773        assert_eq_m256(r, e);
54774    }
54775
54776    #[simd_test(enable = "avx512f,avx512vl")]
54777    unsafe fn test_mm256_maskz_broadcastss_ps() {
54778        let a = _mm_set_ps(17., 18., 19., 20.);
54779        let r = _mm256_maskz_broadcastss_ps(0, a);
54780        assert_eq_m256(r, _mm256_setzero_ps());
54781        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54782        let e = _mm256_set1_ps(20.);
54783        assert_eq_m256(r, e);
54784    }
54785
54786    #[simd_test(enable = "avx512f,avx512vl")]
54787    unsafe fn test_mm_mask_broadcastss_ps() {
54788        let src = _mm_set1_ps(20.);
54789        let a = _mm_set_ps(17., 18., 19., 20.);
54790        let r = _mm_mask_broadcastss_ps(src, 0, a);
54791        assert_eq_m128(r, src);
54792        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54793        let e = _mm_set1_ps(20.);
54794        assert_eq_m128(r, e);
54795    }
54796
54797    #[simd_test(enable = "avx512f,avx512vl")]
54798    unsafe fn test_mm_maskz_broadcastss_ps() {
54799        let a = _mm_set_ps(17., 18., 19., 20.);
54800        let r = _mm_maskz_broadcastss_ps(0, a);
54801        assert_eq_m128(r, _mm_setzero_ps());
54802        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54803        let e = _mm_set1_ps(20.);
54804        assert_eq_m128(r, e);
54805    }
54806
54807    #[simd_test(enable = "avx512f")]
54808    unsafe fn test_mm512_broadcast_i32x4() {
54809        let a = _mm_set_epi32(17, 18, 19, 20);
54810        let r = _mm512_broadcast_i32x4(a);
54811        let e = _mm512_set_epi32(
54812            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54813        );
54814        assert_eq_m512i(r, e);
54815    }
54816
54817    #[simd_test(enable = "avx512f")]
54818    unsafe fn test_mm512_mask_broadcast_i32x4() {
54819        let src = _mm512_set1_epi32(20);
54820        let a = _mm_set_epi32(17, 18, 19, 20);
54821        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54822        assert_eq_m512i(r, src);
54823        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54824        let e = _mm512_set_epi32(
54825            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54826        );
54827        assert_eq_m512i(r, e);
54828    }
54829
54830    #[simd_test(enable = "avx512f")]
54831    unsafe fn test_mm512_maskz_broadcast_i32x4() {
54832        let a = _mm_set_epi32(17, 18, 19, 20);
54833        let r = _mm512_maskz_broadcast_i32x4(0, a);
54834        assert_eq_m512i(r, _mm512_setzero_si512());
54835        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54836        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54837        assert_eq_m512i(r, e);
54838    }
54839
54840    #[simd_test(enable = "avx512f,avx512vl")]
54841    unsafe fn test_mm256_broadcast_i32x4() {
54842        let a = _mm_set_epi32(17, 18, 19, 20);
54843        let r = _mm256_broadcast_i32x4(a);
54844        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54845        assert_eq_m256i(r, e);
54846    }
54847
54848    #[simd_test(enable = "avx512f,avx512vl")]
54849    unsafe fn test_mm256_mask_broadcast_i32x4() {
54850        let src = _mm256_set1_epi32(20);
54851        let a = _mm_set_epi32(17, 18, 19, 20);
54852        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54853        assert_eq_m256i(r, src);
54854        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54855        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54856        assert_eq_m256i(r, e);
54857    }
54858
54859    #[simd_test(enable = "avx512f,avx512vl")]
54860    unsafe fn test_mm256_maskz_broadcast_i32x4() {
54861        let a = _mm_set_epi32(17, 18, 19, 20);
54862        let r = _mm256_maskz_broadcast_i32x4(0, a);
54863        assert_eq_m256i(r, _mm256_setzero_si256());
54864        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54865        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54866        assert_eq_m256i(r, e);
54867    }
54868
54869    #[simd_test(enable = "avx512f")]
54870    unsafe fn test_mm512_broadcast_f32x4() {
54871        let a = _mm_set_ps(17., 18., 19., 20.);
54872        let r = _mm512_broadcast_f32x4(a);
54873        let e = _mm512_set_ps(
54874            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54875        );
54876        assert_eq_m512(r, e);
54877    }
54878
54879    #[simd_test(enable = "avx512f")]
54880    unsafe fn test_mm512_mask_broadcast_f32x4() {
54881        let src = _mm512_set1_ps(20.);
54882        let a = _mm_set_ps(17., 18., 19., 20.);
54883        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54884        assert_eq_m512(r, src);
54885        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54886        let e = _mm512_set_ps(
54887            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54888        );
54889        assert_eq_m512(r, e);
54890    }
54891
54892    #[simd_test(enable = "avx512f")]
54893    unsafe fn test_mm512_maskz_broadcast_f32x4() {
54894        let a = _mm_set_ps(17., 18., 19., 20.);
54895        let r = _mm512_maskz_broadcast_f32x4(0, a);
54896        assert_eq_m512(r, _mm512_setzero_ps());
54897        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54898        let e = _mm512_set_ps(
54899            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54900        );
54901        assert_eq_m512(r, e);
54902    }
54903
54904    #[simd_test(enable = "avx512f,avx512vl")]
54905    unsafe fn test_mm256_broadcast_f32x4() {
54906        let a = _mm_set_ps(17., 18., 19., 20.);
54907        let r = _mm256_broadcast_f32x4(a);
54908        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54909        assert_eq_m256(r, e);
54910    }
54911
54912    #[simd_test(enable = "avx512f,avx512vl")]
54913    unsafe fn test_mm256_mask_broadcast_f32x4() {
54914        let src = _mm256_set1_ps(20.);
54915        let a = _mm_set_ps(17., 18., 19., 20.);
54916        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54917        assert_eq_m256(r, src);
54918        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54919        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54920        assert_eq_m256(r, e);
54921    }
54922
54923    #[simd_test(enable = "avx512f,avx512vl")]
54924    unsafe fn test_mm256_maskz_broadcast_f32x4() {
54925        let a = _mm_set_ps(17., 18., 19., 20.);
54926        let r = _mm256_maskz_broadcast_f32x4(0, a);
54927        assert_eq_m256(r, _mm256_setzero_ps());
54928        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54929        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54930        assert_eq_m256(r, e);
54931    }
54932
54933    #[simd_test(enable = "avx512f")]
54934    unsafe fn test_mm512_mask_blend_epi32() {
54935        let a = _mm512_set1_epi32(1);
54936        let b = _mm512_set1_epi32(2);
54937        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54938        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54939        assert_eq_m512i(r, e);
54940    }
54941
54942    #[simd_test(enable = "avx512f,avx512vl")]
54943    unsafe fn test_mm256_mask_blend_epi32() {
54944        let a = _mm256_set1_epi32(1);
54945        let b = _mm256_set1_epi32(2);
54946        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
54947        let e = _mm256_set1_epi32(2);
54948        assert_eq_m256i(r, e);
54949    }
54950
54951    #[simd_test(enable = "avx512f,avx512vl")]
54952    unsafe fn test_mm_mask_blend_epi32() {
54953        let a = _mm_set1_epi32(1);
54954        let b = _mm_set1_epi32(2);
54955        let r = _mm_mask_blend_epi32(0b00001111, a, b);
54956        let e = _mm_set1_epi32(2);
54957        assert_eq_m128i(r, e);
54958    }
54959
54960    #[simd_test(enable = "avx512f")]
54961    unsafe fn test_mm512_mask_blend_ps() {
54962        let a = _mm512_set1_ps(1.);
54963        let b = _mm512_set1_ps(2.);
54964        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
54965        let e = _mm512_set_ps(
54966            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
54967        );
54968        assert_eq_m512(r, e);
54969    }
54970
54971    #[simd_test(enable = "avx512f,avx512vl")]
54972    unsafe fn test_mm256_mask_blend_ps() {
54973        let a = _mm256_set1_ps(1.);
54974        let b = _mm256_set1_ps(2.);
54975        let r = _mm256_mask_blend_ps(0b11111111, a, b);
54976        let e = _mm256_set1_ps(2.);
54977        assert_eq_m256(r, e);
54978    }
54979
54980    #[simd_test(enable = "avx512f,avx512vl")]
54981    unsafe fn test_mm_mask_blend_ps() {
54982        let a = _mm_set1_ps(1.);
54983        let b = _mm_set1_ps(2.);
54984        let r = _mm_mask_blend_ps(0b00001111, a, b);
54985        let e = _mm_set1_ps(2.);
54986        assert_eq_m128(r, e);
54987    }
54988
54989    #[simd_test(enable = "avx512f")]
54990    unsafe fn test_mm512_unpackhi_epi32() {
54991        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54992        let b = _mm512_set_epi32(
54993            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54994        );
54995        let r = _mm512_unpackhi_epi32(a, b);
54996        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54997        assert_eq_m512i(r, e);
54998    }
54999
55000    #[simd_test(enable = "avx512f")]
55001    unsafe fn test_mm512_mask_unpackhi_epi32() {
55002        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55003        let b = _mm512_set_epi32(
55004            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55005        );
55006        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
55007        assert_eq_m512i(r, a);
55008        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
55009        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
55010        assert_eq_m512i(r, e);
55011    }
55012
55013    #[simd_test(enable = "avx512f")]
55014    unsafe fn test_mm512_maskz_unpackhi_epi32() {
55015        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55016        let b = _mm512_set_epi32(
55017            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55018        );
55019        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55020        assert_eq_m512i(r, _mm512_setzero_si512());
55021        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55022        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55023        assert_eq_m512i(r, e);
55024    }
55025
55026    #[simd_test(enable = "avx512f,avx512vl")]
55027    unsafe fn test_mm256_mask_unpackhi_epi32() {
55028        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55029        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55030        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55031        assert_eq_m256i(r, a);
55032        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55033        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55034        assert_eq_m256i(r, e);
55035    }
55036
55037    #[simd_test(enable = "avx512f,avx512vl")]
55038    unsafe fn test_mm256_maskz_unpackhi_epi32() {
55039        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55040        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55041        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55042        assert_eq_m256i(r, _mm256_setzero_si256());
55043        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55044        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55045        assert_eq_m256i(r, e);
55046    }
55047
55048    #[simd_test(enable = "avx512f,avx512vl")]
55049    unsafe fn test_mm_mask_unpackhi_epi32() {
55050        let a = _mm_set_epi32(1, 2, 3, 4);
55051        let b = _mm_set_epi32(17, 18, 19, 20);
55052        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55053        assert_eq_m128i(r, a);
55054        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55055        let e = _mm_set_epi32(17, 1, 18, 2);
55056        assert_eq_m128i(r, e);
55057    }
55058
55059    #[simd_test(enable = "avx512f,avx512vl")]
55060    unsafe fn test_mm_maskz_unpackhi_epi32() {
55061        let a = _mm_set_epi32(1, 2, 3, 4);
55062        let b = _mm_set_epi32(17, 18, 19, 20);
55063        let r = _mm_maskz_unpackhi_epi32(0, a, b);
55064        assert_eq_m128i(r, _mm_setzero_si128());
55065        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55066        let e = _mm_set_epi32(17, 1, 18, 2);
55067        assert_eq_m128i(r, e);
55068    }
55069
55070    #[simd_test(enable = "avx512f")]
55071    unsafe fn test_mm512_unpackhi_ps() {
55072        let a = _mm512_set_ps(
55073            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55074        );
55075        let b = _mm512_set_ps(
55076            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55077        );
55078        let r = _mm512_unpackhi_ps(a, b);
55079        let e = _mm512_set_ps(
55080            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55081        );
55082        assert_eq_m512(r, e);
55083    }
55084
55085    #[simd_test(enable = "avx512f")]
55086    unsafe fn test_mm512_mask_unpackhi_ps() {
55087        let a = _mm512_set_ps(
55088            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55089        );
55090        let b = _mm512_set_ps(
55091            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55092        );
55093        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55094        assert_eq_m512(r, a);
55095        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55096        let e = _mm512_set_ps(
55097            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55098        );
55099        assert_eq_m512(r, e);
55100    }
55101
55102    #[simd_test(enable = "avx512f")]
55103    unsafe fn test_mm512_maskz_unpackhi_ps() {
55104        let a = _mm512_set_ps(
55105            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55106        );
55107        let b = _mm512_set_ps(
55108            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55109        );
55110        let r = _mm512_maskz_unpackhi_ps(0, a, b);
55111        assert_eq_m512(r, _mm512_setzero_ps());
55112        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55113        let e = _mm512_set_ps(
55114            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55115        );
55116        assert_eq_m512(r, e);
55117    }
55118
55119    #[simd_test(enable = "avx512f,avx512vl")]
55120    unsafe fn test_mm256_mask_unpackhi_ps() {
55121        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55122        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55123        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55124        assert_eq_m256(r, a);
55125        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55126        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55127        assert_eq_m256(r, e);
55128    }
55129
55130    #[simd_test(enable = "avx512f,avx512vl")]
55131    unsafe fn test_mm256_maskz_unpackhi_ps() {
55132        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55133        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55134        let r = _mm256_maskz_unpackhi_ps(0, a, b);
55135        assert_eq_m256(r, _mm256_setzero_ps());
55136        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55137        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55138        assert_eq_m256(r, e);
55139    }
55140
55141    #[simd_test(enable = "avx512f,avx512vl")]
55142    unsafe fn test_mm_mask_unpackhi_ps() {
55143        let a = _mm_set_ps(1., 2., 3., 4.);
55144        let b = _mm_set_ps(17., 18., 19., 20.);
55145        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55146        assert_eq_m128(r, a);
55147        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55148        let e = _mm_set_ps(17., 1., 18., 2.);
55149        assert_eq_m128(r, e);
55150    }
55151
55152    #[simd_test(enable = "avx512f,avx512vl")]
55153    unsafe fn test_mm_maskz_unpackhi_ps() {
55154        let a = _mm_set_ps(1., 2., 3., 4.);
55155        let b = _mm_set_ps(17., 18., 19., 20.);
55156        let r = _mm_maskz_unpackhi_ps(0, a, b);
55157        assert_eq_m128(r, _mm_setzero_ps());
55158        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55159        let e = _mm_set_ps(17., 1., 18., 2.);
55160        assert_eq_m128(r, e);
55161    }
55162
55163    #[simd_test(enable = "avx512f")]
55164    unsafe fn test_mm512_unpacklo_epi32() {
55165        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55166        let b = _mm512_set_epi32(
55167            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55168        );
55169        let r = _mm512_unpacklo_epi32(a, b);
55170        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55171        assert_eq_m512i(r, e);
55172    }
55173
55174    #[simd_test(enable = "avx512f")]
55175    unsafe fn test_mm512_mask_unpacklo_epi32() {
55176        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55177        let b = _mm512_set_epi32(
55178            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55179        );
55180        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55181        assert_eq_m512i(r, a);
55182        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55183        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55184        assert_eq_m512i(r, e);
55185    }
55186
55187    #[simd_test(enable = "avx512f")]
55188    unsafe fn test_mm512_maskz_unpacklo_epi32() {
55189        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55190        let b = _mm512_set_epi32(
55191            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55192        );
55193        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55194        assert_eq_m512i(r, _mm512_setzero_si512());
55195        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55196        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55197        assert_eq_m512i(r, e);
55198    }
55199
55200    #[simd_test(enable = "avx512f,avx512vl")]
55201    unsafe fn test_mm256_mask_unpacklo_epi32() {
55202        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55203        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55204        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55205        assert_eq_m256i(r, a);
55206        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55207        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55208        assert_eq_m256i(r, e);
55209    }
55210
55211    #[simd_test(enable = "avx512f,avx512vl")]
55212    unsafe fn test_mm256_maskz_unpacklo_epi32() {
55213        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55214        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55215        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55216        assert_eq_m256i(r, _mm256_setzero_si256());
55217        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55218        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55219        assert_eq_m256i(r, e);
55220    }
55221
55222    #[simd_test(enable = "avx512f,avx512vl")]
55223    unsafe fn test_mm_mask_unpacklo_epi32() {
55224        let a = _mm_set_epi32(1, 2, 3, 4);
55225        let b = _mm_set_epi32(17, 18, 19, 20);
55226        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55227        assert_eq_m128i(r, a);
55228        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55229        let e = _mm_set_epi32(19, 3, 20, 4);
55230        assert_eq_m128i(r, e);
55231    }
55232
55233    #[simd_test(enable = "avx512f,avx512vl")]
55234    unsafe fn test_mm_maskz_unpacklo_epi32() {
55235        let a = _mm_set_epi32(1, 2, 3, 4);
55236        let b = _mm_set_epi32(17, 18, 19, 20);
55237        let r = _mm_maskz_unpacklo_epi32(0, a, b);
55238        assert_eq_m128i(r, _mm_setzero_si128());
55239        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55240        let e = _mm_set_epi32(19, 3, 20, 4);
55241        assert_eq_m128i(r, e);
55242    }
55243
55244    #[simd_test(enable = "avx512f")]
55245    unsafe fn test_mm512_unpacklo_ps() {
55246        let a = _mm512_set_ps(
55247            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55248        );
55249        let b = _mm512_set_ps(
55250            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55251        );
55252        let r = _mm512_unpacklo_ps(a, b);
55253        let e = _mm512_set_ps(
55254            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55255        );
55256        assert_eq_m512(r, e);
55257    }
55258
55259    #[simd_test(enable = "avx512f")]
55260    unsafe fn test_mm512_mask_unpacklo_ps() {
55261        let a = _mm512_set_ps(
55262            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55263        );
55264        let b = _mm512_set_ps(
55265            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55266        );
55267        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55268        assert_eq_m512(r, a);
55269        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55270        let e = _mm512_set_ps(
55271            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55272        );
55273        assert_eq_m512(r, e);
55274    }
55275
55276    #[simd_test(enable = "avx512f")]
55277    unsafe fn test_mm512_maskz_unpacklo_ps() {
55278        let a = _mm512_set_ps(
55279            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55280        );
55281        let b = _mm512_set_ps(
55282            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55283        );
55284        let r = _mm512_maskz_unpacklo_ps(0, a, b);
55285        assert_eq_m512(r, _mm512_setzero_ps());
55286        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55287        let e = _mm512_set_ps(
55288            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55289        );
55290        assert_eq_m512(r, e);
55291    }
55292
55293    #[simd_test(enable = "avx512f,avx512vl")]
55294    unsafe fn test_mm256_mask_unpacklo_ps() {
55295        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55296        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55297        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55298        assert_eq_m256(r, a);
55299        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55300        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55301        assert_eq_m256(r, e);
55302    }
55303
55304    #[simd_test(enable = "avx512f,avx512vl")]
55305    unsafe fn test_mm256_maskz_unpacklo_ps() {
55306        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55307        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55308        let r = _mm256_maskz_unpacklo_ps(0, a, b);
55309        assert_eq_m256(r, _mm256_setzero_ps());
55310        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55311        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55312        assert_eq_m256(r, e);
55313    }
55314
55315    #[simd_test(enable = "avx512f,avx512vl")]
55316    unsafe fn test_mm_mask_unpacklo_ps() {
55317        let a = _mm_set_ps(1., 2., 3., 4.);
55318        let b = _mm_set_ps(17., 18., 19., 20.);
55319        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55320        assert_eq_m128(r, a);
55321        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55322        let e = _mm_set_ps(19., 3., 20., 4.);
55323        assert_eq_m128(r, e);
55324    }
55325
55326    #[simd_test(enable = "avx512f,avx512vl")]
55327    unsafe fn test_mm_maskz_unpacklo_ps() {
55328        let a = _mm_set_ps(1., 2., 3., 4.);
55329        let b = _mm_set_ps(17., 18., 19., 20.);
55330        let r = _mm_maskz_unpacklo_ps(0, a, b);
55331        assert_eq_m128(r, _mm_setzero_ps());
55332        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55333        let e = _mm_set_ps(19., 3., 20., 4.);
55334        assert_eq_m128(r, e);
55335    }
55336
55337    #[simd_test(enable = "avx512f")]
55338    unsafe fn test_mm512_alignr_epi32() {
55339        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55340        let b = _mm512_set_epi32(
55341            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55342        );
55343        let r = _mm512_alignr_epi32::<0>(a, b);
55344        assert_eq_m512i(r, b);
55345        let r = _mm512_alignr_epi32::<16>(a, b);
55346        assert_eq_m512i(r, b);
55347        let r = _mm512_alignr_epi32::<1>(a, b);
55348        let e = _mm512_set_epi32(
55349            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55350        );
55351        assert_eq_m512i(r, e);
55352    }
55353
55354    #[simd_test(enable = "avx512f")]
55355    unsafe fn test_mm512_mask_alignr_epi32() {
55356        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55357        let b = _mm512_set_epi32(
55358            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55359        );
55360        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55361        assert_eq_m512i(r, a);
55362        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55363        let e = _mm512_set_epi32(
55364            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55365        );
55366        assert_eq_m512i(r, e);
55367    }
55368
55369    #[simd_test(enable = "avx512f")]
55370    unsafe fn test_mm512_maskz_alignr_epi32() {
55371        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55372        let b = _mm512_set_epi32(
55373            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55374        );
55375        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55376        assert_eq_m512i(r, _mm512_setzero_si512());
55377        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55378        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55379        assert_eq_m512i(r, e);
55380    }
55381
55382    #[simd_test(enable = "avx512f,avx512vl")]
55383    unsafe fn test_mm256_alignr_epi32() {
55384        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55385        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55386        let r = _mm256_alignr_epi32::<0>(a, b);
55387        assert_eq_m256i(r, b);
55388        let r = _mm256_alignr_epi32::<1>(a, b);
55389        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55390        assert_eq_m256i(r, e);
55391    }
55392
55393    #[simd_test(enable = "avx512f,avx512vl")]
55394    unsafe fn test_mm256_mask_alignr_epi32() {
55395        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55396        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55397        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55398        assert_eq_m256i(r, a);
55399        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55400        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55401        assert_eq_m256i(r, e);
55402    }
55403
55404    #[simd_test(enable = "avx512f,avx512vl")]
55405    unsafe fn test_mm256_maskz_alignr_epi32() {
55406        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55407        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55408        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55409        assert_eq_m256i(r, _mm256_setzero_si256());
55410        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55411        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55412        assert_eq_m256i(r, e);
55413    }
55414
55415    #[simd_test(enable = "avx512f,avx512vl")]
55416    unsafe fn test_mm_alignr_epi32() {
55417        let a = _mm_set_epi32(4, 3, 2, 1);
55418        let b = _mm_set_epi32(8, 7, 6, 5);
55419        let r = _mm_alignr_epi32::<0>(a, b);
55420        assert_eq_m128i(r, b);
55421        let r = _mm_alignr_epi32::<1>(a, b);
55422        let e = _mm_set_epi32(1, 8, 7, 6);
55423        assert_eq_m128i(r, e);
55424    }
55425
55426    #[simd_test(enable = "avx512f,avx512vl")]
55427    unsafe fn test_mm_mask_alignr_epi32() {
55428        let a = _mm_set_epi32(4, 3, 2, 1);
55429        let b = _mm_set_epi32(8, 7, 6, 5);
55430        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55431        assert_eq_m128i(r, a);
55432        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55433        let e = _mm_set_epi32(1, 8, 7, 6);
55434        assert_eq_m128i(r, e);
55435    }
55436
55437    #[simd_test(enable = "avx512f,avx512vl")]
55438    unsafe fn test_mm_maskz_alignr_epi32() {
55439        let a = _mm_set_epi32(4, 3, 2, 1);
55440        let b = _mm_set_epi32(8, 7, 6, 5);
55441        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55442        assert_eq_m128i(r, _mm_setzero_si128());
55443        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55444        let e = _mm_set_epi32(1, 8, 7, 6);
55445        assert_eq_m128i(r, e);
55446    }
55447
55448    #[simd_test(enable = "avx512f")]
55449    unsafe fn test_mm512_and_epi32() {
55450        #[rustfmt::skip]
55451        let a = _mm512_set_epi32(
55452            1 << 1 | 1 << 2, 0, 0, 0,
55453            0, 0, 0, 0,
55454            0, 0, 0, 0,
55455            0, 0, 0, 1 << 1 | 1 << 3,
55456        );
55457        #[rustfmt::skip]
55458        let b = _mm512_set_epi32(
55459            1 << 1, 0, 0, 0,
55460            0, 0, 0, 0,
55461            0, 0, 0, 0,
55462            0, 0, 0, 1 << 3 | 1 << 4,
55463        );
55464        let r = _mm512_and_epi32(a, b);
55465        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55466        assert_eq_m512i(r, e);
55467    }
55468
55469    #[simd_test(enable = "avx512f")]
55470    unsafe fn test_mm512_mask_and_epi32() {
55471        #[rustfmt::skip]
55472        let a = _mm512_set_epi32(
55473            1 << 1 | 1 << 2, 0, 0, 0,
55474            0, 0, 0, 0,
55475            0, 0, 0, 0,
55476            0, 0, 0, 1 << 1 | 1 << 3,
55477        );
55478        #[rustfmt::skip]
55479        let b = _mm512_set_epi32(
55480            1 << 1, 0, 0, 0,
55481            0, 0, 0, 0,
55482            0, 0, 0, 0,
55483            0, 0, 0, 1 << 3 | 1 << 4,
55484        );
55485        let r = _mm512_mask_and_epi32(a, 0, a, b);
55486        assert_eq_m512i(r, a);
55487        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55488        #[rustfmt::skip]
55489        let e = _mm512_set_epi32(
55490            1 << 1 | 1 << 2, 0, 0, 0,
55491            0, 0, 0, 0,
55492            0, 0, 0, 0,
55493            0, 0, 0, 1 << 3,
55494        );
55495        assert_eq_m512i(r, e);
55496    }
55497
55498    #[simd_test(enable = "avx512f")]
55499    unsafe fn test_mm512_maskz_and_epi32() {
55500        #[rustfmt::skip]
55501        let a = _mm512_set_epi32(
55502            1 << 1 | 1 << 2, 0, 0, 0,
55503            0, 0, 0, 0,
55504            0, 0, 0, 0,
55505            0, 0, 0, 1 << 1 | 1 << 3,
55506        );
55507        #[rustfmt::skip]
55508        let b = _mm512_set_epi32(
55509            1 << 1, 0, 0, 0,
55510            0, 0, 0, 0,
55511            0, 0, 0, 0,
55512            0, 0, 0, 1 << 3 | 1 << 4,
55513        );
55514        let r = _mm512_maskz_and_epi32(0, a, b);
55515        assert_eq_m512i(r, _mm512_setzero_si512());
55516        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55517        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55518        assert_eq_m512i(r, e);
55519    }
55520
55521    #[simd_test(enable = "avx512f,avx512vl")]
55522    unsafe fn test_mm256_mask_and_epi32() {
55523        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55524        let b = _mm256_set1_epi32(1 << 1);
55525        let r = _mm256_mask_and_epi32(a, 0, a, b);
55526        assert_eq_m256i(r, a);
55527        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55528        let e = _mm256_set1_epi32(1 << 1);
55529        assert_eq_m256i(r, e);
55530    }
55531
55532    #[simd_test(enable = "avx512f,avx512vl")]
55533    unsafe fn test_mm256_maskz_and_epi32() {
55534        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55535        let b = _mm256_set1_epi32(1 << 1);
55536        let r = _mm256_maskz_and_epi32(0, a, b);
55537        assert_eq_m256i(r, _mm256_setzero_si256());
55538        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55539        let e = _mm256_set1_epi32(1 << 1);
55540        assert_eq_m256i(r, e);
55541    }
55542
55543    #[simd_test(enable = "avx512f,avx512vl")]
55544    unsafe fn test_mm_mask_and_epi32() {
55545        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55546        let b = _mm_set1_epi32(1 << 1);
55547        let r = _mm_mask_and_epi32(a, 0, a, b);
55548        assert_eq_m128i(r, a);
55549        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55550        let e = _mm_set1_epi32(1 << 1);
55551        assert_eq_m128i(r, e);
55552    }
55553
55554    #[simd_test(enable = "avx512f,avx512vl")]
55555    unsafe fn test_mm_maskz_and_epi32() {
55556        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55557        let b = _mm_set1_epi32(1 << 1);
55558        let r = _mm_maskz_and_epi32(0, a, b);
55559        assert_eq_m128i(r, _mm_setzero_si128());
55560        let r = _mm_maskz_and_epi32(0b00001111, a, b);
55561        let e = _mm_set1_epi32(1 << 1);
55562        assert_eq_m128i(r, e);
55563    }
55564
55565    #[simd_test(enable = "avx512f")]
55566    unsafe fn test_mm512_and_si512() {
55567        #[rustfmt::skip]
55568        let a = _mm512_set_epi32(
55569            1 << 1 | 1 << 2, 0, 0, 0,
55570            0, 0, 0, 0,
55571            0, 0, 0, 0,
55572            0, 0, 0, 1 << 1 | 1 << 3,
55573        );
55574        #[rustfmt::skip]
55575        let b = _mm512_set_epi32(
55576            1 << 1, 0, 0, 0,
55577            0, 0, 0, 0,
55578            0, 0, 0, 0,
55579            0, 0, 0, 1 << 3 | 1 << 4,
55580        );
55581        let r = _mm512_and_epi32(a, b);
55582        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55583        assert_eq_m512i(r, e);
55584    }
55585
55586    #[simd_test(enable = "avx512f")]
55587    unsafe fn test_mm512_or_epi32() {
55588        #[rustfmt::skip]
55589        let a = _mm512_set_epi32(
55590            1 << 1 | 1 << 2, 0, 0, 0,
55591            0, 0, 0, 0,
55592            0, 0, 0, 0,
55593            0, 0, 0, 1 << 1 | 1 << 3,
55594        );
55595        #[rustfmt::skip]
55596        let b = _mm512_set_epi32(
55597            1 << 1, 0, 0, 0,
55598            0, 0, 0, 0,
55599            0, 0, 0, 0,
55600            0, 0, 0, 1 << 3 | 1 << 4,
55601        );
55602        let r = _mm512_or_epi32(a, b);
55603        #[rustfmt::skip]
55604        let e = _mm512_set_epi32(
55605            1 << 1 | 1 << 2, 0, 0, 0,
55606            0, 0, 0, 0,
55607            0, 0, 0, 0,
55608            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55609        );
55610        assert_eq_m512i(r, e);
55611    }
55612
55613    #[simd_test(enable = "avx512f")]
55614    unsafe fn test_mm512_mask_or_epi32() {
55615        #[rustfmt::skip]
55616        let a = _mm512_set_epi32(
55617            1 << 1 | 1 << 2, 0, 0, 0,
55618            0, 0, 0, 0,
55619            0, 0, 0, 0,
55620            0, 0, 0, 1 << 1 | 1 << 3,
55621        );
55622        #[rustfmt::skip]
55623        let b = _mm512_set_epi32(
55624            1 << 1, 0, 0, 0,
55625            0, 0, 0, 0,
55626            0, 0, 0, 0,
55627            0, 0, 0, 1 << 3 | 1 << 4,
55628        );
55629        let r = _mm512_mask_or_epi32(a, 0, a, b);
55630        assert_eq_m512i(r, a);
55631        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55632        #[rustfmt::skip]
55633        let e = _mm512_set_epi32(
55634            1 << 1 | 1 << 2, 0, 0, 0,
55635            0, 0, 0, 0,
55636            0, 0, 0, 0,
55637            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55638        );
55639        assert_eq_m512i(r, e);
55640    }
55641
55642    #[simd_test(enable = "avx512f")]
55643    unsafe fn test_mm512_maskz_or_epi32() {
55644        #[rustfmt::skip]
55645        let a = _mm512_set_epi32(
55646            1 << 1 | 1 << 2, 0, 0, 0,
55647            0, 0, 0, 0,
55648            0, 0, 0, 0,
55649            0, 0, 0, 1 << 1 | 1 << 3,
55650        );
55651        #[rustfmt::skip]
55652        let b = _mm512_set_epi32(
55653            1 << 1, 0, 0, 0,
55654            0, 0, 0, 0,
55655            0, 0, 0, 0,
55656            0, 0, 0, 1 << 3 | 1 << 4,
55657        );
55658        let r = _mm512_maskz_or_epi32(0, a, b);
55659        assert_eq_m512i(r, _mm512_setzero_si512());
55660        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55661        #[rustfmt::skip]
55662        let e = _mm512_set_epi32(
55663            0, 0, 0, 0,
55664            0, 0, 0, 0,
55665            0, 0, 0, 0,
55666            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55667        );
55668        assert_eq_m512i(r, e);
55669    }
55670
55671    #[simd_test(enable = "avx512f,avx512vl")]
55672    unsafe fn test_mm256_or_epi32() {
55673        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55674        let b = _mm256_set1_epi32(1 << 1);
55675        let r = _mm256_or_epi32(a, b);
55676        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55677        assert_eq_m256i(r, e);
55678    }
55679
55680    #[simd_test(enable = "avx512f,avx512vl")]
55681    unsafe fn test_mm256_mask_or_epi32() {
55682        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55683        let b = _mm256_set1_epi32(1 << 1);
55684        let r = _mm256_mask_or_epi32(a, 0, a, b);
55685        assert_eq_m256i(r, a);
55686        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55687        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55688        assert_eq_m256i(r, e);
55689    }
55690
55691    #[simd_test(enable = "avx512f,avx512vl")]
55692    unsafe fn test_mm256_maskz_or_epi32() {
55693        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55694        let b = _mm256_set1_epi32(1 << 1);
55695        let r = _mm256_maskz_or_epi32(0, a, b);
55696        assert_eq_m256i(r, _mm256_setzero_si256());
55697        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55698        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55699        assert_eq_m256i(r, e);
55700    }
55701
55702    #[simd_test(enable = "avx512f,avx512vl")]
55703    unsafe fn test_mm_or_epi32() {
55704        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55705        let b = _mm_set1_epi32(1 << 1);
55706        let r = _mm_or_epi32(a, b);
55707        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55708        assert_eq_m128i(r, e);
55709    }
55710
55711    #[simd_test(enable = "avx512f,avx512vl")]
55712    unsafe fn test_mm_mask_or_epi32() {
55713        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55714        let b = _mm_set1_epi32(1 << 1);
55715        let r = _mm_mask_or_epi32(a, 0, a, b);
55716        assert_eq_m128i(r, a);
55717        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55718        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55719        assert_eq_m128i(r, e);
55720    }
55721
55722    #[simd_test(enable = "avx512f,avx512vl")]
55723    unsafe fn test_mm_maskz_or_epi32() {
55724        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55725        let b = _mm_set1_epi32(1 << 1);
55726        let r = _mm_maskz_or_epi32(0, a, b);
55727        assert_eq_m128i(r, _mm_setzero_si128());
55728        let r = _mm_maskz_or_epi32(0b00001111, a, b);
55729        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55730        assert_eq_m128i(r, e);
55731    }
55732
55733    #[simd_test(enable = "avx512f")]
55734    unsafe fn test_mm512_or_si512() {
55735        #[rustfmt::skip]
55736        let a = _mm512_set_epi32(
55737            1 << 1 | 1 << 2, 0, 0, 0,
55738            0, 0, 0, 0,
55739            0, 0, 0, 0,
55740            0, 0, 0, 1 << 1 | 1 << 3,
55741        );
55742        #[rustfmt::skip]
55743        let b = _mm512_set_epi32(
55744            1 << 1, 0, 0, 0,
55745            0, 0, 0, 0,
55746            0, 0, 0, 0,
55747            0, 0, 0, 1 << 3 | 1 << 4,
55748        );
55749        let r = _mm512_or_epi32(a, b);
55750        #[rustfmt::skip]
55751        let e = _mm512_set_epi32(
55752            1 << 1 | 1 << 2, 0, 0, 0,
55753            0, 0, 0, 0,
55754            0, 0, 0, 0,
55755            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55756        );
55757        assert_eq_m512i(r, e);
55758    }
55759
55760    #[simd_test(enable = "avx512f")]
55761    unsafe fn test_mm512_xor_epi32() {
55762        #[rustfmt::skip]
55763        let a = _mm512_set_epi32(
55764            1 << 1 | 1 << 2, 0, 0, 0,
55765            0, 0, 0, 0,
55766            0, 0, 0, 0,
55767            0, 0, 0, 1 << 1 | 1 << 3,
55768        );
55769        #[rustfmt::skip]
55770        let b = _mm512_set_epi32(
55771            1 << 1, 0, 0, 0,
55772            0, 0, 0, 0,
55773            0, 0, 0, 0,
55774            0, 0, 0, 1 << 3 | 1 << 4,
55775        );
55776        let r = _mm512_xor_epi32(a, b);
55777        #[rustfmt::skip]
55778        let e = _mm512_set_epi32(
55779            1 << 2, 0, 0, 0,
55780            0, 0, 0, 0,
55781            0, 0, 0, 0,
55782            0, 0, 0, 1 << 1 | 1 << 4,
55783        );
55784        assert_eq_m512i(r, e);
55785    }
55786
55787    #[simd_test(enable = "avx512f")]
55788    unsafe fn test_mm512_mask_xor_epi32() {
55789        #[rustfmt::skip]
55790        let a = _mm512_set_epi32(
55791            1 << 1 | 1 << 2, 0, 0, 0,
55792            0, 0, 0, 0,
55793            0, 0, 0, 0,
55794            0, 0, 0, 1 << 1 | 1 << 3,
55795        );
55796        #[rustfmt::skip]
55797        let b = _mm512_set_epi32(
55798            1 << 1, 0, 0, 0,
55799            0, 0, 0, 0,
55800            0, 0, 0, 0,
55801            0, 0, 0, 1 << 3 | 1 << 4,
55802        );
55803        let r = _mm512_mask_xor_epi32(a, 0, a, b);
55804        assert_eq_m512i(r, a);
55805        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55806        #[rustfmt::skip]
55807        let e = _mm512_set_epi32(
55808            1 << 1 | 1 << 2, 0, 0, 0,
55809            0, 0, 0, 0,
55810            0, 0, 0, 0,
55811            0, 0, 0, 1 << 1 | 1 << 4,
55812        );
55813        assert_eq_m512i(r, e);
55814    }
55815
55816    #[simd_test(enable = "avx512f")]
55817    unsafe fn test_mm512_maskz_xor_epi32() {
55818        #[rustfmt::skip]
55819        let a = _mm512_set_epi32(
55820            1 << 1 | 1 << 2, 0, 0, 0,
55821            0, 0, 0, 0,
55822            0, 0, 0, 0,
55823            0, 0, 0, 1 << 1 | 1 << 3,
55824        );
55825        #[rustfmt::skip]
55826        let b = _mm512_set_epi32(
55827            1 << 1, 0, 0, 0,
55828            0, 0, 0, 0,
55829            0, 0, 0, 0,
55830            0, 0, 0, 1 << 3 | 1 << 4,
55831        );
55832        let r = _mm512_maskz_xor_epi32(0, a, b);
55833        assert_eq_m512i(r, _mm512_setzero_si512());
55834        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55835        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55836        assert_eq_m512i(r, e);
55837    }
55838
55839    #[simd_test(enable = "avx512f,avx512vl")]
55840    unsafe fn test_mm256_xor_epi32() {
55841        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55842        let b = _mm256_set1_epi32(1 << 1);
55843        let r = _mm256_xor_epi32(a, b);
55844        let e = _mm256_set1_epi32(1 << 2);
55845        assert_eq_m256i(r, e);
55846    }
55847
55848    #[simd_test(enable = "avx512f,avx512vl")]
55849    unsafe fn test_mm256_mask_xor_epi32() {
55850        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55851        let b = _mm256_set1_epi32(1 << 1);
55852        let r = _mm256_mask_xor_epi32(a, 0, a, b);
55853        assert_eq_m256i(r, a);
55854        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55855        let e = _mm256_set1_epi32(1 << 2);
55856        assert_eq_m256i(r, e);
55857    }
55858
55859    #[simd_test(enable = "avx512f,avx512vl")]
55860    unsafe fn test_mm256_maskz_xor_epi32() {
55861        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55862        let b = _mm256_set1_epi32(1 << 1);
55863        let r = _mm256_maskz_xor_epi32(0, a, b);
55864        assert_eq_m256i(r, _mm256_setzero_si256());
55865        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55866        let e = _mm256_set1_epi32(1 << 2);
55867        assert_eq_m256i(r, e);
55868    }
55869
55870    #[simd_test(enable = "avx512f,avx512vl")]
55871    unsafe fn test_mm_xor_epi32() {
55872        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55873        let b = _mm_set1_epi32(1 << 1);
55874        let r = _mm_xor_epi32(a, b);
55875        let e = _mm_set1_epi32(1 << 2);
55876        assert_eq_m128i(r, e);
55877    }
55878
55879    #[simd_test(enable = "avx512f,avx512vl")]
55880    unsafe fn test_mm_mask_xor_epi32() {
55881        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55882        let b = _mm_set1_epi32(1 << 1);
55883        let r = _mm_mask_xor_epi32(a, 0, a, b);
55884        assert_eq_m128i(r, a);
55885        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55886        let e = _mm_set1_epi32(1 << 2);
55887        assert_eq_m128i(r, e);
55888    }
55889
55890    #[simd_test(enable = "avx512f,avx512vl")]
55891    unsafe fn test_mm_maskz_xor_epi32() {
55892        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55893        let b = _mm_set1_epi32(1 << 1);
55894        let r = _mm_maskz_xor_epi32(0, a, b);
55895        assert_eq_m128i(r, _mm_setzero_si128());
55896        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55897        let e = _mm_set1_epi32(1 << 2);
55898        assert_eq_m128i(r, e);
55899    }
55900
55901    #[simd_test(enable = "avx512f")]
55902    unsafe fn test_mm512_xor_si512() {
55903        #[rustfmt::skip]
55904        let a = _mm512_set_epi32(
55905            1 << 1 | 1 << 2, 0, 0, 0,
55906            0, 0, 0, 0,
55907            0, 0, 0, 0,
55908            0, 0, 0, 1 << 1 | 1 << 3,
55909        );
55910        #[rustfmt::skip]
55911        let b = _mm512_set_epi32(
55912            1 << 1, 0, 0, 0,
55913            0, 0, 0, 0,
55914            0, 0, 0, 0,
55915            0, 0, 0, 1 << 3 | 1 << 4,
55916        );
55917        let r = _mm512_xor_epi32(a, b);
55918        #[rustfmt::skip]
55919        let e = _mm512_set_epi32(
55920            1 << 2, 0, 0, 0,
55921            0, 0, 0, 0,
55922            0, 0, 0, 0,
55923            0, 0, 0, 1 << 1 | 1 << 4,
55924        );
55925        assert_eq_m512i(r, e);
55926    }
55927
55928    #[simd_test(enable = "avx512f")]
55929    unsafe fn test_mm512_andnot_epi32() {
55930        let a = _mm512_set1_epi32(0);
55931        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55932        let r = _mm512_andnot_epi32(a, b);
55933        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55934        assert_eq_m512i(r, e);
55935    }
55936
55937    #[simd_test(enable = "avx512f")]
55938    unsafe fn test_mm512_mask_andnot_epi32() {
55939        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55940        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55941        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
55942        assert_eq_m512i(r, a);
55943        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
55944        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55945        assert_eq_m512i(r, e);
55946    }
55947
55948    #[simd_test(enable = "avx512f")]
55949    unsafe fn test_mm512_maskz_andnot_epi32() {
55950        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55951        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55952        let r = _mm512_maskz_andnot_epi32(0, a, b);
55953        assert_eq_m512i(r, _mm512_setzero_si512());
55954        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
55955        #[rustfmt::skip]
55956        let e = _mm512_set_epi32(
55957            0, 0, 0, 0,
55958            0, 0, 0, 0,
55959            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55960            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55961        );
55962        assert_eq_m512i(r, e);
55963    }
55964
55965    #[simd_test(enable = "avx512f,avx512vl")]
55966    unsafe fn test_mm256_mask_andnot_epi32() {
55967        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55968        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55969        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
55970        assert_eq_m256i(r, a);
55971        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
55972        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55973        assert_eq_m256i(r, e);
55974    }
55975
55976    #[simd_test(enable = "avx512f,avx512vl")]
55977    unsafe fn test_mm256_maskz_andnot_epi32() {
55978        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55979        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55980        let r = _mm256_maskz_andnot_epi32(0, a, b);
55981        assert_eq_m256i(r, _mm256_setzero_si256());
55982        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
55983        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55984        assert_eq_m256i(r, e);
55985    }
55986
55987    #[simd_test(enable = "avx512f,avx512vl")]
55988    unsafe fn test_mm_mask_andnot_epi32() {
55989        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55990        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55991        let r = _mm_mask_andnot_epi32(a, 0, a, b);
55992        assert_eq_m128i(r, a);
55993        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
55994        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55995        assert_eq_m128i(r, e);
55996    }
55997
55998    #[simd_test(enable = "avx512f,avx512vl")]
55999    unsafe fn test_mm_maskz_andnot_epi32() {
56000        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
56001        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
56002        let r = _mm_maskz_andnot_epi32(0, a, b);
56003        assert_eq_m128i(r, _mm_setzero_si128());
56004        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
56005        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
56006        assert_eq_m128i(r, e);
56007    }
56008
56009    #[simd_test(enable = "avx512f")]
56010    unsafe fn test_cvtmask16_u32() {
56011        let a: __mmask16 = 0b11001100_00110011;
56012        let r = _cvtmask16_u32(a);
56013        let e: u32 = 0b11001100_00110011;
56014        assert_eq!(r, e);
56015    }
56016
56017    #[simd_test(enable = "avx512f")]
56018    unsafe fn test_cvtu32_mask16() {
56019        let a: u32 = 0b11001100_00110011;
56020        let r = _cvtu32_mask16(a);
56021        let e: __mmask16 = 0b11001100_00110011;
56022        assert_eq!(r, e);
56023    }
56024
56025    #[simd_test(enable = "avx512f")]
56026    unsafe fn test_mm512_kand() {
56027        let a: u16 = 0b11001100_00110011;
56028        let b: u16 = 0b11001100_00110011;
56029        let r = _mm512_kand(a, b);
56030        let e: u16 = 0b11001100_00110011;
56031        assert_eq!(r, e);
56032    }
56033
56034    #[simd_test(enable = "avx512f")]
56035    unsafe fn test_kand_mask16() {
56036        let a: u16 = 0b11001100_00110011;
56037        let b: u16 = 0b11001100_00110011;
56038        let r = _kand_mask16(a, b);
56039        let e: u16 = 0b11001100_00110011;
56040        assert_eq!(r, e);
56041    }
56042
56043    #[simd_test(enable = "avx512f")]
56044    unsafe fn test_mm512_kor() {
56045        let a: u16 = 0b11001100_00110011;
56046        let b: u16 = 0b00101110_00001011;
56047        let r = _mm512_kor(a, b);
56048        let e: u16 = 0b11101110_00111011;
56049        assert_eq!(r, e);
56050    }
56051
56052    #[simd_test(enable = "avx512f")]
56053    unsafe fn test_kor_mask16() {
56054        let a: u16 = 0b11001100_00110011;
56055        let b: u16 = 0b00101110_00001011;
56056        let r = _kor_mask16(a, b);
56057        let e: u16 = 0b11101110_00111011;
56058        assert_eq!(r, e);
56059    }
56060
56061    #[simd_test(enable = "avx512f")]
56062    unsafe fn test_mm512_kxor() {
56063        let a: u16 = 0b11001100_00110011;
56064        let b: u16 = 0b00101110_00001011;
56065        let r = _mm512_kxor(a, b);
56066        let e: u16 = 0b11100010_00111000;
56067        assert_eq!(r, e);
56068    }
56069
56070    #[simd_test(enable = "avx512f")]
56071    unsafe fn test_kxor_mask16() {
56072        let a: u16 = 0b11001100_00110011;
56073        let b: u16 = 0b00101110_00001011;
56074        let r = _kxor_mask16(a, b);
56075        let e: u16 = 0b11100010_00111000;
56076        assert_eq!(r, e);
56077    }
56078
56079    #[simd_test(enable = "avx512f")]
56080    unsafe fn test_mm512_knot() {
56081        let a: u16 = 0b11001100_00110011;
56082        let r = _mm512_knot(a);
56083        let e: u16 = 0b00110011_11001100;
56084        assert_eq!(r, e);
56085    }
56086
56087    #[simd_test(enable = "avx512f")]
56088    unsafe fn test_knot_mask16() {
56089        let a: u16 = 0b11001100_00110011;
56090        let r = _knot_mask16(a);
56091        let e: u16 = 0b00110011_11001100;
56092        assert_eq!(r, e);
56093    }
56094
56095    #[simd_test(enable = "avx512f")]
56096    unsafe fn test_mm512_kandn() {
56097        let a: u16 = 0b11001100_00110011;
56098        let b: u16 = 0b00101110_00001011;
56099        let r = _mm512_kandn(a, b);
56100        let e: u16 = 0b00100010_00001000;
56101        assert_eq!(r, e);
56102    }
56103
56104    #[simd_test(enable = "avx512f")]
56105    unsafe fn test_kandn_mask16() {
56106        let a: u16 = 0b11001100_00110011;
56107        let b: u16 = 0b00101110_00001011;
56108        let r = _kandn_mask16(a, b);
56109        let e: u16 = 0b00100010_00001000;
56110        assert_eq!(r, e);
56111    }
56112
56113    #[simd_test(enable = "avx512f")]
56114    unsafe fn test_mm512_kxnor() {
56115        let a: u16 = 0b11001100_00110011;
56116        let b: u16 = 0b00101110_00001011;
56117        let r = _mm512_kxnor(a, b);
56118        let e: u16 = 0b00011101_11000111;
56119        assert_eq!(r, e);
56120    }
56121
56122    #[simd_test(enable = "avx512f")]
56123    unsafe fn test_kxnor_mask16() {
56124        let a: u16 = 0b11001100_00110011;
56125        let b: u16 = 0b00101110_00001011;
56126        let r = _kxnor_mask16(a, b);
56127        let e: u16 = 0b00011101_11000111;
56128        assert_eq!(r, e);
56129    }
56130
56131    #[simd_test(enable = "avx512dq")]
56132    unsafe fn test_kortest_mask16_u8() {
56133        let a: __mmask16 = 0b0110100101101001;
56134        let b: __mmask16 = 0b1011011010110110;
56135        let mut all_ones: u8 = 0;
56136        let r = _kortest_mask16_u8(a, b, &mut all_ones);
56137        assert_eq!(r, 0);
56138        assert_eq!(all_ones, 1);
56139    }
56140
56141    #[simd_test(enable = "avx512dq")]
56142    unsafe fn test_kortestc_mask16_u8() {
56143        let a: __mmask16 = 0b0110100101101001;
56144        let b: __mmask16 = 0b1011011010110110;
56145        let r = _kortestc_mask16_u8(a, b);
56146        assert_eq!(r, 1);
56147    }
56148
56149    #[simd_test(enable = "avx512dq")]
56150    unsafe fn test_kortestz_mask16_u8() {
56151        let a: __mmask16 = 0b0110100101101001;
56152        let b: __mmask16 = 0b1011011010110110;
56153        let r = _kortestz_mask16_u8(a, b);
56154        assert_eq!(r, 0);
56155    }
56156
56157    #[simd_test(enable = "avx512dq")]
56158    unsafe fn test_kshiftli_mask16() {
56159        let a: __mmask16 = 0b1001011011000011;
56160        let r = _kshiftli_mask16::<3>(a);
56161        let e: __mmask16 = 0b1011011000011000;
56162        assert_eq!(r, e);
56163    }
56164
56165    #[simd_test(enable = "avx512dq")]
56166    unsafe fn test_kshiftri_mask16() {
56167        let a: __mmask16 = 0b0110100100111100;
56168        let r = _kshiftri_mask16::<3>(a);
56169        let e: __mmask16 = 0b0000110100100111;
56170        assert_eq!(r, e);
56171    }
56172
56173    #[simd_test(enable = "avx512f")]
56174    unsafe fn test_load_mask16() {
56175        let a: __mmask16 = 0b1001011011000011;
56176        let r = _load_mask16(&a);
56177        let e: __mmask16 = 0b1001011011000011;
56178        assert_eq!(r, e);
56179    }
56180
56181    #[simd_test(enable = "avx512f")]
56182    unsafe fn test_store_mask16() {
56183        let a: __mmask16 = 0b0110100100111100;
56184        let mut r = 0;
56185        _store_mask16(&mut r, a);
56186        let e: __mmask16 = 0b0110100100111100;
56187        assert_eq!(r, e);
56188    }
56189
56190    #[simd_test(enable = "avx512f")]
56191    unsafe fn test_mm512_kmov() {
56192        let a: u16 = 0b11001100_00110011;
56193        let r = _mm512_kmov(a);
56194        let e: u16 = 0b11001100_00110011;
56195        assert_eq!(r, e);
56196    }
56197
56198    #[simd_test(enable = "avx512f")]
56199    unsafe fn test_mm512_int2mask() {
56200        let a: i32 = 0b11001100_00110011;
56201        let r = _mm512_int2mask(a);
56202        let e: u16 = 0b11001100_00110011;
56203        assert_eq!(r, e);
56204    }
56205
56206    #[simd_test(enable = "avx512f")]
56207    unsafe fn test_mm512_mask2int() {
56208        let k1: __mmask16 = 0b11001100_00110011;
56209        let r = _mm512_mask2int(k1);
56210        let e: i32 = 0b11001100_00110011;
56211        assert_eq!(r, e);
56212    }
56213
56214    #[simd_test(enable = "avx512f")]
56215    unsafe fn test_mm512_kunpackb() {
56216        let a: u16 = 0b11001100_00110011;
56217        let b: u16 = 0b00101110_00001011;
56218        let r = _mm512_kunpackb(a, b);
56219        let e: u16 = 0b00110011_00001011;
56220        assert_eq!(r, e);
56221    }
56222
56223    #[simd_test(enable = "avx512f")]
56224    unsafe fn test_mm512_kortestc() {
56225        let a: u16 = 0b11001100_00110011;
56226        let b: u16 = 0b00101110_00001011;
56227        let r = _mm512_kortestc(a, b);
56228        assert_eq!(r, 0);
56229        let b: u16 = 0b11111111_11111111;
56230        let r = _mm512_kortestc(a, b);
56231        assert_eq!(r, 1);
56232    }
56233
56234    #[simd_test(enable = "avx512f")]
56235    unsafe fn test_mm512_kortestz() {
56236        let a: u16 = 0b11001100_00110011;
56237        let b: u16 = 0b00101110_00001011;
56238        let r = _mm512_kortestz(a, b);
56239        assert_eq!(r, 0);
56240        let r = _mm512_kortestz(0, 0);
56241        assert_eq!(r, 1);
56242    }
56243
56244    #[simd_test(enable = "avx512f")]
56245    unsafe fn test_mm512_test_epi32_mask() {
56246        let a = _mm512_set1_epi32(1 << 0);
56247        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56248        let r = _mm512_test_epi32_mask(a, b);
56249        let e: __mmask16 = 0b11111111_11111111;
56250        assert_eq!(r, e);
56251    }
56252
56253    #[simd_test(enable = "avx512f")]
56254    unsafe fn test_mm512_mask_test_epi32_mask() {
56255        let a = _mm512_set1_epi32(1 << 0);
56256        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56257        let r = _mm512_mask_test_epi32_mask(0, a, b);
56258        assert_eq!(r, 0);
56259        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56260        let e: __mmask16 = 0b11111111_11111111;
56261        assert_eq!(r, e);
56262    }
56263
56264    #[simd_test(enable = "avx512f,avx512vl")]
56265    unsafe fn test_mm256_test_epi32_mask() {
56266        let a = _mm256_set1_epi32(1 << 0);
56267        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56268        let r = _mm256_test_epi32_mask(a, b);
56269        let e: __mmask8 = 0b11111111;
56270        assert_eq!(r, e);
56271    }
56272
56273    #[simd_test(enable = "avx512f,avx512vl")]
56274    unsafe fn test_mm256_mask_test_epi32_mask() {
56275        let a = _mm256_set1_epi32(1 << 0);
56276        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56277        let r = _mm256_mask_test_epi32_mask(0, a, b);
56278        assert_eq!(r, 0);
56279        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56280        let e: __mmask8 = 0b11111111;
56281        assert_eq!(r, e);
56282    }
56283
56284    #[simd_test(enable = "avx512f,avx512vl")]
56285    unsafe fn test_mm_test_epi32_mask() {
56286        let a = _mm_set1_epi32(1 << 0);
56287        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56288        let r = _mm_test_epi32_mask(a, b);
56289        let e: __mmask8 = 0b00001111;
56290        assert_eq!(r, e);
56291    }
56292
56293    #[simd_test(enable = "avx512f,avx512vl")]
56294    unsafe fn test_mm_mask_test_epi32_mask() {
56295        let a = _mm_set1_epi32(1 << 0);
56296        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56297        let r = _mm_mask_test_epi32_mask(0, a, b);
56298        assert_eq!(r, 0);
56299        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56300        let e: __mmask8 = 0b00001111;
56301        assert_eq!(r, e);
56302    }
56303
56304    #[simd_test(enable = "avx512f")]
56305    unsafe fn test_mm512_testn_epi32_mask() {
56306        let a = _mm512_set1_epi32(1 << 0);
56307        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56308        let r = _mm512_testn_epi32_mask(a, b);
56309        let e: __mmask16 = 0b00000000_00000000;
56310        assert_eq!(r, e);
56311    }
56312
56313    #[simd_test(enable = "avx512f")]
56314    unsafe fn test_mm512_mask_testn_epi32_mask() {
56315        let a = _mm512_set1_epi32(1 << 0);
56316        let b = _mm512_set1_epi32(1 << 1);
56317        let r = _mm512_mask_test_epi32_mask(0, a, b);
56318        assert_eq!(r, 0);
56319        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56320        let e: __mmask16 = 0b11111111_11111111;
56321        assert_eq!(r, e);
56322    }
56323
56324    #[simd_test(enable = "avx512f,avx512vl")]
56325    unsafe fn test_mm256_testn_epi32_mask() {
56326        let a = _mm256_set1_epi32(1 << 0);
56327        let b = _mm256_set1_epi32(1 << 1);
56328        let r = _mm256_testn_epi32_mask(a, b);
56329        let e: __mmask8 = 0b11111111;
56330        assert_eq!(r, e);
56331    }
56332
56333    #[simd_test(enable = "avx512f,avx512vl")]
56334    unsafe fn test_mm256_mask_testn_epi32_mask() {
56335        let a = _mm256_set1_epi32(1 << 0);
56336        let b = _mm256_set1_epi32(1 << 1);
56337        let r = _mm256_mask_test_epi32_mask(0, a, b);
56338        assert_eq!(r, 0);
56339        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56340        let e: __mmask8 = 0b11111111;
56341        assert_eq!(r, e);
56342    }
56343
56344    #[simd_test(enable = "avx512f,avx512vl")]
56345    unsafe fn test_mm_testn_epi32_mask() {
56346        let a = _mm_set1_epi32(1 << 0);
56347        let b = _mm_set1_epi32(1 << 1);
56348        let r = _mm_testn_epi32_mask(a, b);
56349        let e: __mmask8 = 0b00001111;
56350        assert_eq!(r, e);
56351    }
56352
56353    #[simd_test(enable = "avx512f,avx512vl")]
56354    unsafe fn test_mm_mask_testn_epi32_mask() {
56355        let a = _mm_set1_epi32(1 << 0);
56356        let b = _mm_set1_epi32(1 << 1);
56357        let r = _mm_mask_test_epi32_mask(0, a, b);
56358        assert_eq!(r, 0);
56359        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56360        let e: __mmask8 = 0b00001111;
56361        assert_eq!(r, e);
56362    }
56363
56364    #[simd_test(enable = "avx512f")]
56365    #[cfg_attr(miri, ignore)]
56366    unsafe fn test_mm512_stream_ps() {
56367        #[repr(align(64))]
56368        struct Memory {
56369            pub data: [f32; 16], // 64 bytes
56370        }
56371        let a = _mm512_set1_ps(7.0);
56372        let mut mem = Memory { data: [-1.0; 16] };
56373
56374        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56375        for i in 0..16 {
56376            assert_eq!(mem.data[i], get_m512(a, i));
56377        }
56378    }
56379
56380    #[simd_test(enable = "avx512f")]
56381    #[cfg_attr(miri, ignore)]
56382    unsafe fn test_mm512_stream_pd() {
56383        #[repr(align(64))]
56384        struct Memory {
56385            pub data: [f64; 8],
56386        }
56387        let a = _mm512_set1_pd(7.0);
56388        let mut mem = Memory { data: [-1.0; 8] };
56389
56390        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56391        for i in 0..8 {
56392            assert_eq!(mem.data[i], get_m512d(a, i));
56393        }
56394    }
56395
56396    #[simd_test(enable = "avx512f")]
56397    #[cfg_attr(miri, ignore)]
56398    unsafe fn test_mm512_stream_si512() {
56399        #[repr(align(64))]
56400        struct Memory {
56401            pub data: [i64; 8],
56402        }
56403        let a = _mm512_set1_epi32(7);
56404        let mut mem = Memory { data: [-1; 8] };
56405
56406        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56407        for i in 0..8 {
56408            assert_eq!(mem.data[i], get_m512i(a, i));
56409        }
56410    }
56411
56412    #[simd_test(enable = "avx512f")]
56413    unsafe fn test_mm512_stream_load_si512() {
56414        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56415        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56416        assert_eq_m512i(a, r);
56417    }
56418
56419    #[simd_test(enable = "avx512f")]
56420    unsafe fn test_mm512_reduce_add_epi32() {
56421        let a = _mm512_set1_epi32(1);
56422        let e: i32 = _mm512_reduce_add_epi32(a);
56423        assert_eq!(16, e);
56424    }
56425
56426    #[simd_test(enable = "avx512f")]
56427    unsafe fn test_mm512_mask_reduce_add_epi32() {
56428        let a = _mm512_set1_epi32(1);
56429        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56430        assert_eq!(8, e);
56431    }
56432
56433    #[simd_test(enable = "avx512f")]
56434    unsafe fn test_mm512_reduce_add_ps() {
56435        let a = _mm512_set1_ps(1.);
56436        let e: f32 = _mm512_reduce_add_ps(a);
56437        assert_eq!(16., e);
56438    }
56439
56440    #[simd_test(enable = "avx512f")]
56441    unsafe fn test_mm512_mask_reduce_add_ps() {
56442        let a = _mm512_set1_ps(1.);
56443        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56444        assert_eq!(8., e);
56445    }
56446
56447    #[simd_test(enable = "avx512f")]
56448    unsafe fn test_mm512_reduce_mul_epi32() {
56449        let a = _mm512_set1_epi32(2);
56450        let e: i32 = _mm512_reduce_mul_epi32(a);
56451        assert_eq!(65536, e);
56452    }
56453
56454    #[simd_test(enable = "avx512f")]
56455    unsafe fn test_mm512_mask_reduce_mul_epi32() {
56456        let a = _mm512_set1_epi32(2);
56457        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56458        assert_eq!(256, e);
56459    }
56460
56461    #[simd_test(enable = "avx512f")]
56462    unsafe fn test_mm512_reduce_mul_ps() {
56463        let a = _mm512_set1_ps(2.);
56464        let e: f32 = _mm512_reduce_mul_ps(a);
56465        assert_eq!(65536., e);
56466    }
56467
56468    #[simd_test(enable = "avx512f")]
56469    unsafe fn test_mm512_mask_reduce_mul_ps() {
56470        let a = _mm512_set1_ps(2.);
56471        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56472        assert_eq!(256., e);
56473    }
56474
56475    #[simd_test(enable = "avx512f")]
56476    unsafe fn test_mm512_reduce_max_epi32() {
56477        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56478        let e: i32 = _mm512_reduce_max_epi32(a);
56479        assert_eq!(15, e);
56480    }
56481
56482    #[simd_test(enable = "avx512f")]
56483    unsafe fn test_mm512_mask_reduce_max_epi32() {
56484        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56485        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56486        assert_eq!(7, e);
56487    }
56488
56489    #[simd_test(enable = "avx512f")]
56490    unsafe fn test_mm512_reduce_max_epu32() {
56491        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56492        let e: u32 = _mm512_reduce_max_epu32(a);
56493        assert_eq!(15, e);
56494    }
56495
56496    #[simd_test(enable = "avx512f")]
56497    unsafe fn test_mm512_mask_reduce_max_epu32() {
56498        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56499        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56500        assert_eq!(7, e);
56501    }
56502
56503    #[simd_test(enable = "avx512f")]
56504    unsafe fn test_mm512_reduce_max_ps() {
56505        let a = _mm512_set_ps(
56506            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56507        );
56508        let e: f32 = _mm512_reduce_max_ps(a);
56509        assert_eq!(15., e);
56510    }
56511
56512    #[simd_test(enable = "avx512f")]
56513    unsafe fn test_mm512_mask_reduce_max_ps() {
56514        let a = _mm512_set_ps(
56515            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56516        );
56517        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56518        assert_eq!(7., e);
56519    }
56520
56521    #[simd_test(enable = "avx512f")]
56522    unsafe fn test_mm512_reduce_min_epi32() {
56523        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56524        let e: i32 = _mm512_reduce_min_epi32(a);
56525        assert_eq!(0, e);
56526    }
56527
56528    #[simd_test(enable = "avx512f")]
56529    unsafe fn test_mm512_mask_reduce_min_epi32() {
56530        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56531        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56532        assert_eq!(0, e);
56533    }
56534
56535    #[simd_test(enable = "avx512f")]
56536    unsafe fn test_mm512_reduce_min_epu32() {
56537        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56538        let e: u32 = _mm512_reduce_min_epu32(a);
56539        assert_eq!(0, e);
56540    }
56541
56542    #[simd_test(enable = "avx512f")]
56543    unsafe fn test_mm512_mask_reduce_min_epu32() {
56544        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56545        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56546        assert_eq!(0, e);
56547    }
56548
56549    #[simd_test(enable = "avx512f")]
56550    unsafe fn test_mm512_reduce_min_ps() {
56551        let a = _mm512_set_ps(
56552            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56553        );
56554        let e: f32 = _mm512_reduce_min_ps(a);
56555        assert_eq!(0., e);
56556    }
56557
56558    #[simd_test(enable = "avx512f")]
56559    unsafe fn test_mm512_mask_reduce_min_ps() {
56560        let a = _mm512_set_ps(
56561            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56562        );
56563        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56564        assert_eq!(0., e);
56565    }
56566
56567    #[simd_test(enable = "avx512f")]
56568    unsafe fn test_mm512_reduce_and_epi32() {
56569        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56570        let e: i32 = _mm512_reduce_and_epi32(a);
56571        assert_eq!(0, e);
56572    }
56573
56574    #[simd_test(enable = "avx512f")]
56575    unsafe fn test_mm512_mask_reduce_and_epi32() {
56576        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56577        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56578        assert_eq!(1, e);
56579    }
56580
56581    #[simd_test(enable = "avx512f")]
56582    unsafe fn test_mm512_reduce_or_epi32() {
56583        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56584        let e: i32 = _mm512_reduce_or_epi32(a);
56585        assert_eq!(3, e);
56586    }
56587
56588    #[simd_test(enable = "avx512f")]
56589    unsafe fn test_mm512_mask_reduce_or_epi32() {
56590        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56591        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56592        assert_eq!(1, e);
56593    }
56594
56595    #[simd_test(enable = "avx512f")]
56596    unsafe fn test_mm512_mask_compress_epi32() {
56597        let src = _mm512_set1_epi32(200);
56598        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56599        let r = _mm512_mask_compress_epi32(src, 0, a);
56600        assert_eq_m512i(r, src);
56601        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56602        let e = _mm512_set_epi32(
56603            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56604        );
56605        assert_eq_m512i(r, e);
56606    }
56607
56608    #[simd_test(enable = "avx512f")]
56609    unsafe fn test_mm512_maskz_compress_epi32() {
56610        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56611        let r = _mm512_maskz_compress_epi32(0, a);
56612        assert_eq_m512i(r, _mm512_setzero_si512());
56613        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56614        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56615        assert_eq_m512i(r, e);
56616    }
56617
56618    #[simd_test(enable = "avx512f,avx512vl")]
56619    unsafe fn test_mm256_mask_compress_epi32() {
56620        let src = _mm256_set1_epi32(200);
56621        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56622        let r = _mm256_mask_compress_epi32(src, 0, a);
56623        assert_eq_m256i(r, src);
56624        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56625        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56626        assert_eq_m256i(r, e);
56627    }
56628
56629    #[simd_test(enable = "avx512f,avx512vl")]
56630    unsafe fn test_mm256_maskz_compress_epi32() {
56631        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56632        let r = _mm256_maskz_compress_epi32(0, a);
56633        assert_eq_m256i(r, _mm256_setzero_si256());
56634        let r = _mm256_maskz_compress_epi32(0b01010101, a);
56635        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56636        assert_eq_m256i(r, e);
56637    }
56638
56639    #[simd_test(enable = "avx512f,avx512vl")]
56640    unsafe fn test_mm_mask_compress_epi32() {
56641        let src = _mm_set1_epi32(200);
56642        let a = _mm_set_epi32(0, 1, 2, 3);
56643        let r = _mm_mask_compress_epi32(src, 0, a);
56644        assert_eq_m128i(r, src);
56645        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56646        let e = _mm_set_epi32(200, 200, 1, 3);
56647        assert_eq_m128i(r, e);
56648    }
56649
56650    #[simd_test(enable = "avx512f,avx512vl")]
56651    unsafe fn test_mm_maskz_compress_epi32() {
56652        let a = _mm_set_epi32(0, 1, 2, 3);
56653        let r = _mm_maskz_compress_epi32(0, a);
56654        assert_eq_m128i(r, _mm_setzero_si128());
56655        let r = _mm_maskz_compress_epi32(0b00000101, a);
56656        let e = _mm_set_epi32(0, 0, 1, 3);
56657        assert_eq_m128i(r, e);
56658    }
56659
56660    #[simd_test(enable = "avx512f")]
56661    unsafe fn test_mm512_mask_compress_ps() {
56662        let src = _mm512_set1_ps(200.);
56663        let a = _mm512_set_ps(
56664            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56665        );
56666        let r = _mm512_mask_compress_ps(src, 0, a);
56667        assert_eq_m512(r, src);
56668        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56669        let e = _mm512_set_ps(
56670            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56671        );
56672        assert_eq_m512(r, e);
56673    }
56674
56675    #[simd_test(enable = "avx512f")]
56676    unsafe fn test_mm512_maskz_compress_ps() {
56677        let a = _mm512_set_ps(
56678            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56679        );
56680        let r = _mm512_maskz_compress_ps(0, a);
56681        assert_eq_m512(r, _mm512_setzero_ps());
56682        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56683        let e = _mm512_set_ps(
56684            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56685        );
56686        assert_eq_m512(r, e);
56687    }
56688
56689    #[simd_test(enable = "avx512f,avx512vl")]
56690    unsafe fn test_mm256_mask_compress_ps() {
56691        let src = _mm256_set1_ps(200.);
56692        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56693        let r = _mm256_mask_compress_ps(src, 0, a);
56694        assert_eq_m256(r, src);
56695        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56696        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56697        assert_eq_m256(r, e);
56698    }
56699
56700    #[simd_test(enable = "avx512f,avx512vl")]
56701    unsafe fn test_mm256_maskz_compress_ps() {
56702        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56703        let r = _mm256_maskz_compress_ps(0, a);
56704        assert_eq_m256(r, _mm256_setzero_ps());
56705        let r = _mm256_maskz_compress_ps(0b01010101, a);
56706        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56707        assert_eq_m256(r, e);
56708    }
56709
56710    #[simd_test(enable = "avx512f,avx512vl")]
56711    unsafe fn test_mm_mask_compress_ps() {
56712        let src = _mm_set1_ps(200.);
56713        let a = _mm_set_ps(0., 1., 2., 3.);
56714        let r = _mm_mask_compress_ps(src, 0, a);
56715        assert_eq_m128(r, src);
56716        let r = _mm_mask_compress_ps(src, 0b00000101, a);
56717        let e = _mm_set_ps(200., 200., 1., 3.);
56718        assert_eq_m128(r, e);
56719    }
56720
56721    #[simd_test(enable = "avx512f,avx512vl")]
56722    unsafe fn test_mm_maskz_compress_ps() {
56723        let a = _mm_set_ps(0., 1., 2., 3.);
56724        let r = _mm_maskz_compress_ps(0, a);
56725        assert_eq_m128(r, _mm_setzero_ps());
56726        let r = _mm_maskz_compress_ps(0b00000101, a);
56727        let e = _mm_set_ps(0., 0., 1., 3.);
56728        assert_eq_m128(r, e);
56729    }
56730
56731    #[simd_test(enable = "avx512f")]
56732    unsafe fn test_mm512_mask_compressstoreu_epi32() {
56733        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56734        let mut r = [0_i32; 16];
56735        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56736        assert_eq!(&r, &[0_i32; 16]);
56737        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
56738        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56739    }
56740
56741    #[simd_test(enable = "avx512f,avx512vl")]
56742    unsafe fn test_mm256_mask_compressstoreu_epi32() {
56743        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56744        let mut r = [0_i32; 8];
56745        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56746        assert_eq!(&r, &[0_i32; 8]);
56747        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
56748        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56749    }
56750
56751    #[simd_test(enable = "avx512f,avx512vl")]
56752    unsafe fn test_mm_mask_compressstoreu_epi32() {
56753        let a = _mm_setr_epi32(1, 2, 3, 4);
56754        let mut r = [0_i32; 4];
56755        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56756        assert_eq!(&r, &[0_i32; 4]);
56757        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
56758        assert_eq!(&r, &[1, 2, 4, 0]);
56759    }
56760
56761    #[simd_test(enable = "avx512f")]
56762    unsafe fn test_mm512_mask_compressstoreu_epi64() {
56763        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56764        let mut r = [0_i64; 8];
56765        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56766        assert_eq!(&r, &[0_i64; 8]);
56767        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
56768        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56769    }
56770
56771    #[simd_test(enable = "avx512f,avx512vl")]
56772    unsafe fn test_mm256_mask_compressstoreu_epi64() {
56773        let a = _mm256_setr_epi64x(1, 2, 3, 4);
56774        let mut r = [0_i64; 4];
56775        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56776        assert_eq!(&r, &[0_i64; 4]);
56777        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
56778        assert_eq!(&r, &[1, 2, 4, 0]);
56779    }
56780
56781    #[simd_test(enable = "avx512f,avx512vl")]
56782    unsafe fn test_mm_mask_compressstoreu_epi64() {
56783        let a = _mm_setr_epi64x(1, 2);
56784        let mut r = [0_i64; 2];
56785        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56786        assert_eq!(&r, &[0_i64; 2]);
56787        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
56788        assert_eq!(&r, &[2, 0]);
56789    }
56790
56791    #[simd_test(enable = "avx512f")]
56792    unsafe fn test_mm512_mask_compressstoreu_ps() {
56793        let a = _mm512_setr_ps(
56794            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56795            13_f32, 14_f32, 15_f32, 16_f32,
56796        );
56797        let mut r = [0_f32; 16];
56798        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56799        assert_eq!(&r, &[0_f32; 16]);
56800        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
56801        assert_eq!(
56802            &r,
56803            &[
56804                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56805                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56806            ]
56807        );
56808    }
56809
56810    #[simd_test(enable = "avx512f,avx512vl")]
56811    unsafe fn test_mm256_mask_compressstoreu_ps() {
56812        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56813        let mut r = [0_f32; 8];
56814        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56815        assert_eq!(&r, &[0_f32; 8]);
56816        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
56817        assert_eq!(
56818            &r,
56819            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56820        );
56821    }
56822
56823    #[simd_test(enable = "avx512f,avx512vl")]
56824    unsafe fn test_mm_mask_compressstoreu_ps() {
56825        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56826        let mut r = [0.; 4];
56827        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56828        assert_eq!(&r, &[0.; 4]);
56829        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
56830        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56831    }
56832
56833    #[simd_test(enable = "avx512f")]
56834    unsafe fn test_mm512_mask_compressstoreu_pd() {
56835        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56836        let mut r = [0.; 8];
56837        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56838        assert_eq!(&r, &[0.; 8]);
56839        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
56840        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56841    }
56842
56843    #[simd_test(enable = "avx512f,avx512vl")]
56844    unsafe fn test_mm256_mask_compressstoreu_pd() {
56845        let a = _mm256_setr_pd(1., 2., 3., 4.);
56846        let mut r = [0.; 4];
56847        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56848        assert_eq!(&r, &[0.; 4]);
56849        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
56850        assert_eq!(&r, &[1., 2., 4., 0.]);
56851    }
56852
56853    #[simd_test(enable = "avx512f,avx512vl")]
56854    unsafe fn test_mm_mask_compressstoreu_pd() {
56855        let a = _mm_setr_pd(1., 2.);
56856        let mut r = [0.; 2];
56857        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56858        assert_eq!(&r, &[0.; 2]);
56859        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
56860        assert_eq!(&r, &[2., 0.]);
56861    }
56862
56863    #[simd_test(enable = "avx512f")]
56864    unsafe fn test_mm512_mask_expand_epi32() {
56865        let src = _mm512_set1_epi32(200);
56866        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56867        let r = _mm512_mask_expand_epi32(src, 0, a);
56868        assert_eq_m512i(r, src);
56869        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56870        let e = _mm512_set_epi32(
56871            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56872        );
56873        assert_eq_m512i(r, e);
56874    }
56875
56876    #[simd_test(enable = "avx512f")]
56877    unsafe fn test_mm512_maskz_expand_epi32() {
56878        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56879        let r = _mm512_maskz_expand_epi32(0, a);
56880        assert_eq_m512i(r, _mm512_setzero_si512());
56881        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56882        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56883        assert_eq_m512i(r, e);
56884    }
56885
56886    #[simd_test(enable = "avx512f,avx512vl")]
56887    unsafe fn test_mm256_mask_expand_epi32() {
56888        let src = _mm256_set1_epi32(200);
56889        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56890        let r = _mm256_mask_expand_epi32(src, 0, a);
56891        assert_eq_m256i(r, src);
56892        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56893        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56894        assert_eq_m256i(r, e);
56895    }
56896
56897    #[simd_test(enable = "avx512f,avx512vl")]
56898    unsafe fn test_mm256_maskz_expand_epi32() {
56899        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56900        let r = _mm256_maskz_expand_epi32(0, a);
56901        assert_eq_m256i(r, _mm256_setzero_si256());
56902        let r = _mm256_maskz_expand_epi32(0b01010101, a);
56903        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56904        assert_eq_m256i(r, e);
56905    }
56906
56907    #[simd_test(enable = "avx512f,avx512vl")]
56908    unsafe fn test_mm_mask_expand_epi32() {
56909        let src = _mm_set1_epi32(200);
56910        let a = _mm_set_epi32(0, 1, 2, 3);
56911        let r = _mm_mask_expand_epi32(src, 0, a);
56912        assert_eq_m128i(r, src);
56913        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56914        let e = _mm_set_epi32(200, 2, 200, 3);
56915        assert_eq_m128i(r, e);
56916    }
56917
56918    #[simd_test(enable = "avx512f,avx512vl")]
56919    unsafe fn test_mm_maskz_expand_epi32() {
56920        let a = _mm_set_epi32(0, 1, 2, 3);
56921        let r = _mm_maskz_expand_epi32(0, a);
56922        assert_eq_m128i(r, _mm_setzero_si128());
56923        let r = _mm_maskz_expand_epi32(0b00000101, a);
56924        let e = _mm_set_epi32(0, 2, 0, 3);
56925        assert_eq_m128i(r, e);
56926    }
56927
56928    #[simd_test(enable = "avx512f")]
56929    unsafe fn test_mm512_mask_expand_ps() {
56930        let src = _mm512_set1_ps(200.);
56931        let a = _mm512_set_ps(
56932            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56933        );
56934        let r = _mm512_mask_expand_ps(src, 0, a);
56935        assert_eq_m512(r, src);
56936        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56937        let e = _mm512_set_ps(
56938            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56939        );
56940        assert_eq_m512(r, e);
56941    }
56942
56943    #[simd_test(enable = "avx512f")]
56944    unsafe fn test_mm512_maskz_expand_ps() {
56945        let a = _mm512_set_ps(
56946            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56947        );
56948        let r = _mm512_maskz_expand_ps(0, a);
56949        assert_eq_m512(r, _mm512_setzero_ps());
56950        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
56951        let e = _mm512_set_ps(
56952            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
56953        );
56954        assert_eq_m512(r, e);
56955    }
56956
56957    #[simd_test(enable = "avx512f,avx512vl")]
56958    unsafe fn test_mm256_mask_expand_ps() {
56959        let src = _mm256_set1_ps(200.);
56960        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56961        let r = _mm256_mask_expand_ps(src, 0, a);
56962        assert_eq_m256(r, src);
56963        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
56964        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
56965        assert_eq_m256(r, e);
56966    }
56967
56968    #[simd_test(enable = "avx512f,avx512vl")]
56969    unsafe fn test_mm256_maskz_expand_ps() {
56970        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56971        let r = _mm256_maskz_expand_ps(0, a);
56972        assert_eq_m256(r, _mm256_setzero_ps());
56973        let r = _mm256_maskz_expand_ps(0b01010101, a);
56974        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
56975        assert_eq_m256(r, e);
56976    }
56977
56978    #[simd_test(enable = "avx512f,avx512vl")]
56979    unsafe fn test_mm_mask_expand_ps() {
56980        let src = _mm_set1_ps(200.);
56981        let a = _mm_set_ps(0., 1., 2., 3.);
56982        let r = _mm_mask_expand_ps(src, 0, a);
56983        assert_eq_m128(r, src);
56984        let r = _mm_mask_expand_ps(src, 0b00000101, a);
56985        let e = _mm_set_ps(200., 2., 200., 3.);
56986        assert_eq_m128(r, e);
56987    }
56988
56989    #[simd_test(enable = "avx512f,avx512vl")]
56990    unsafe fn test_mm_maskz_expand_ps() {
56991        let a = _mm_set_ps(0., 1., 2., 3.);
56992        let r = _mm_maskz_expand_ps(0, a);
56993        assert_eq_m128(r, _mm_setzero_ps());
56994        let r = _mm_maskz_expand_ps(0b00000101, a);
56995        let e = _mm_set_ps(0., 2., 0., 3.);
56996        assert_eq_m128(r, e);
56997    }
56998
56999    #[simd_test(enable = "avx512f")]
57000    unsafe fn test_mm512_loadu_epi32() {
57001        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57002        let p = a.as_ptr();
57003        let r = _mm512_loadu_epi32(black_box(p));
57004        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57005        assert_eq_m512i(r, e);
57006    }
57007
57008    #[simd_test(enable = "avx512f,avx512vl")]
57009    unsafe fn test_mm256_loadu_epi32() {
57010        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
57011        let p = a.as_ptr();
57012        let r = _mm256_loadu_epi32(black_box(p));
57013        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57014        assert_eq_m256i(r, e);
57015    }
57016
57017    #[simd_test(enable = "avx512f,avx512vl")]
57018    unsafe fn test_mm_loadu_epi32() {
57019        let a = &[4, 3, 2, 5];
57020        let p = a.as_ptr();
57021        let r = _mm_loadu_epi32(black_box(p));
57022        let e = _mm_setr_epi32(4, 3, 2, 5);
57023        assert_eq_m128i(r, e);
57024    }
57025
57026    #[simd_test(enable = "avx512f")]
57027    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57028        let a = _mm512_set1_epi32(9);
57029        let mut r = _mm256_undefined_si256();
57030        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57031        let e = _mm256_set1_epi16(9);
57032        assert_eq_m256i(r, e);
57033    }
57034
57035    #[simd_test(enable = "avx512f,avx512vl")]
57036    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57037        let a = _mm256_set1_epi32(9);
57038        let mut r = _mm_undefined_si128();
57039        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57040        let e = _mm_set1_epi16(9);
57041        assert_eq_m128i(r, e);
57042    }
57043
57044    #[simd_test(enable = "avx512f,avx512vl")]
57045    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57046        let a = _mm_set1_epi32(9);
57047        let mut r = _mm_set1_epi8(0);
57048        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57049        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57050        assert_eq_m128i(r, e);
57051    }
57052
57053    #[simd_test(enable = "avx512f")]
57054    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57055        let a = _mm512_set1_epi32(i32::MAX);
57056        let mut r = _mm256_undefined_si256();
57057        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57058        let e = _mm256_set1_epi16(i16::MAX);
57059        assert_eq_m256i(r, e);
57060    }
57061
57062    #[simd_test(enable = "avx512f,avx512vl")]
57063    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57064        let a = _mm256_set1_epi32(i32::MAX);
57065        let mut r = _mm_undefined_si128();
57066        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57067        let e = _mm_set1_epi16(i16::MAX);
57068        assert_eq_m128i(r, e);
57069    }
57070
57071    #[simd_test(enable = "avx512f,avx512vl")]
57072    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57073        let a = _mm_set1_epi32(i32::MAX);
57074        let mut r = _mm_set1_epi8(0);
57075        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57076        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57077        assert_eq_m128i(r, e);
57078    }
57079
57080    #[simd_test(enable = "avx512f")]
57081    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57082        let a = _mm512_set1_epi32(i32::MAX);
57083        let mut r = _mm256_undefined_si256();
57084        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57085        let e = _mm256_set1_epi16(u16::MAX as i16);
57086        assert_eq_m256i(r, e);
57087    }
57088
57089    #[simd_test(enable = "avx512f,avx512vl")]
57090    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57091        let a = _mm256_set1_epi32(i32::MAX);
57092        let mut r = _mm_undefined_si128();
57093        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57094        let e = _mm_set1_epi16(u16::MAX as i16);
57095        assert_eq_m128i(r, e);
57096    }
57097
57098    #[simd_test(enable = "avx512f,avx512vl")]
57099    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57100        let a = _mm_set1_epi32(i32::MAX);
57101        let mut r = _mm_set1_epi8(0);
57102        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57103        let e = _mm_set_epi16(
57104            0,
57105            0,
57106            0,
57107            0,
57108            u16::MAX as i16,
57109            u16::MAX as i16,
57110            u16::MAX as i16,
57111            u16::MAX as i16,
57112        );
57113        assert_eq_m128i(r, e);
57114    }
57115
57116    #[simd_test(enable = "avx512f")]
57117    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57118        let a = _mm512_set1_epi32(9);
57119        let mut r = _mm_undefined_si128();
57120        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57121        let e = _mm_set1_epi8(9);
57122        assert_eq_m128i(r, e);
57123    }
57124
57125    #[simd_test(enable = "avx512f,avx512vl")]
57126    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57127        let a = _mm256_set1_epi32(9);
57128        let mut r = _mm_set1_epi8(0);
57129        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57130        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57131        assert_eq_m128i(r, e);
57132    }
57133
57134    #[simd_test(enable = "avx512f,avx512vl")]
57135    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57136        let a = _mm_set1_epi32(9);
57137        let mut r = _mm_set1_epi8(0);
57138        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57139        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57140        assert_eq_m128i(r, e);
57141    }
57142
57143    #[simd_test(enable = "avx512f")]
57144    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57145        let a = _mm512_set1_epi32(i32::MAX);
57146        let mut r = _mm_undefined_si128();
57147        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57148        let e = _mm_set1_epi8(i8::MAX);
57149        assert_eq_m128i(r, e);
57150    }
57151
57152    #[simd_test(enable = "avx512f,avx512vl")]
57153    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57154        let a = _mm256_set1_epi32(i32::MAX);
57155        let mut r = _mm_set1_epi8(0);
57156        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57157        #[rustfmt::skip]
57158        let e = _mm_set_epi8(
57159            0, 0, 0, 0,
57160            0, 0, 0, 0,
57161            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57162            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57163        );
57164        assert_eq_m128i(r, e);
57165    }
57166
57167    #[simd_test(enable = "avx512f,avx512vl")]
57168    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57169        let a = _mm_set1_epi32(i32::MAX);
57170        let mut r = _mm_set1_epi8(0);
57171        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57172        #[rustfmt::skip]
57173        let e = _mm_set_epi8(
57174            0, 0, 0, 0,
57175            0, 0, 0, 0,
57176            0, 0, 0, 0,
57177            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57178        );
57179        assert_eq_m128i(r, e);
57180    }
57181
57182    #[simd_test(enable = "avx512f")]
57183    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57184        let a = _mm512_set1_epi32(i32::MAX);
57185        let mut r = _mm_undefined_si128();
57186        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57187        let e = _mm_set1_epi8(u8::MAX as i8);
57188        assert_eq_m128i(r, e);
57189    }
57190
57191    #[simd_test(enable = "avx512f,avx512vl")]
57192    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57193        let a = _mm256_set1_epi32(i32::MAX);
57194        let mut r = _mm_set1_epi8(0);
57195        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57196        #[rustfmt::skip]
57197        let e = _mm_set_epi8(
57198            0, 0, 0, 0,
57199            0, 0, 0, 0,
57200            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57201            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57202        );
57203        assert_eq_m128i(r, e);
57204    }
57205
57206    #[simd_test(enable = "avx512f,avx512vl")]
57207    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57208        let a = _mm_set1_epi32(i32::MAX);
57209        let mut r = _mm_set1_epi8(0);
57210        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57211        #[rustfmt::skip]
57212        let e = _mm_set_epi8(
57213            0, 0, 0, 0,
57214            0, 0, 0, 0,
57215            0, 0, 0, 0,
57216            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57217        );
57218        assert_eq_m128i(r, e);
57219    }
57220
57221    #[simd_test(enable = "avx512f")]
57222    unsafe fn test_mm512_storeu_epi32() {
57223        let a = _mm512_set1_epi32(9);
57224        let mut r = _mm512_undefined_epi32();
57225        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57226        assert_eq_m512i(r, a);
57227    }
57228
57229    #[simd_test(enable = "avx512f,avx512vl")]
57230    unsafe fn test_mm256_storeu_epi32() {
57231        let a = _mm256_set1_epi32(9);
57232        let mut r = _mm256_undefined_si256();
57233        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57234        assert_eq_m256i(r, a);
57235    }
57236
57237    #[simd_test(enable = "avx512f,avx512vl")]
57238    unsafe fn test_mm_storeu_epi32() {
57239        let a = _mm_set1_epi32(9);
57240        let mut r = _mm_undefined_si128();
57241        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57242        assert_eq_m128i(r, a);
57243    }
57244
57245    #[simd_test(enable = "avx512f")]
57246    unsafe fn test_mm512_loadu_si512() {
57247        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57248        let p = a.as_ptr().cast();
57249        let r = _mm512_loadu_si512(black_box(p));
57250        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57251        assert_eq_m512i(r, e);
57252    }
57253
57254    #[simd_test(enable = "avx512f")]
57255    unsafe fn test_mm512_storeu_si512() {
57256        let a = _mm512_set1_epi32(9);
57257        let mut r = _mm512_undefined_epi32();
57258        _mm512_storeu_si512(&mut r as *mut _, a);
57259        assert_eq_m512i(r, a);
57260    }
57261
57262    #[simd_test(enable = "avx512f")]
57263    unsafe fn test_mm512_load_si512() {
57264        #[repr(align(64))]
57265        struct Align {
57266            data: [i32; 16], // 64 bytes
57267        }
57268        let a = Align {
57269            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57270        };
57271        let p = (a.data).as_ptr().cast();
57272        let r = _mm512_load_si512(black_box(p));
57273        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57274        assert_eq_m512i(r, e);
57275    }
57276
57277    #[simd_test(enable = "avx512f")]
57278    unsafe fn test_mm512_store_si512() {
57279        let a = _mm512_set1_epi32(9);
57280        let mut r = _mm512_undefined_epi32();
57281        _mm512_store_si512(&mut r as *mut _, a);
57282        assert_eq_m512i(r, a);
57283    }
57284
57285    #[simd_test(enable = "avx512f")]
57286    unsafe fn test_mm512_load_epi32() {
57287        #[repr(align(64))]
57288        struct Align {
57289            data: [i32; 16], // 64 bytes
57290        }
57291        let a = Align {
57292            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57293        };
57294        let p = (a.data).as_ptr();
57295        let r = _mm512_load_epi32(black_box(p));
57296        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57297        assert_eq_m512i(r, e);
57298    }
57299
57300    #[simd_test(enable = "avx512f,avx512vl")]
57301    unsafe fn test_mm256_load_epi32() {
57302        #[repr(align(64))]
57303        struct Align {
57304            data: [i32; 8],
57305        }
57306        let a = Align {
57307            data: [4, 3, 2, 5, 8, 9, 64, 50],
57308        };
57309        let p = (a.data).as_ptr();
57310        let r = _mm256_load_epi32(black_box(p));
57311        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57312        assert_eq_m256i(r, e);
57313    }
57314
57315    #[simd_test(enable = "avx512f,avx512vl")]
57316    unsafe fn test_mm_load_epi32() {
57317        #[repr(align(64))]
57318        struct Align {
57319            data: [i32; 4],
57320        }
57321        let a = Align { data: [4, 3, 2, 5] };
57322        let p = (a.data).as_ptr();
57323        let r = _mm_load_epi32(black_box(p));
57324        let e = _mm_setr_epi32(4, 3, 2, 5);
57325        assert_eq_m128i(r, e);
57326    }
57327
57328    #[simd_test(enable = "avx512f")]
57329    unsafe fn test_mm512_store_epi32() {
57330        let a = _mm512_set1_epi32(9);
57331        let mut r = _mm512_undefined_epi32();
57332        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57333        assert_eq_m512i(r, a);
57334    }
57335
57336    #[simd_test(enable = "avx512f,avx512vl")]
57337    unsafe fn test_mm256_store_epi32() {
57338        let a = _mm256_set1_epi32(9);
57339        let mut r = _mm256_undefined_si256();
57340        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57341        assert_eq_m256i(r, a);
57342    }
57343
57344    #[simd_test(enable = "avx512f,avx512vl")]
57345    unsafe fn test_mm_store_epi32() {
57346        let a = _mm_set1_epi32(9);
57347        let mut r = _mm_undefined_si128();
57348        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57349        assert_eq_m128i(r, a);
57350    }
57351
57352    #[simd_test(enable = "avx512f")]
57353    unsafe fn test_mm512_load_ps() {
57354        #[repr(align(64))]
57355        struct Align {
57356            data: [f32; 16], // 64 bytes
57357        }
57358        let a = Align {
57359            data: [
57360                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57361            ],
57362        };
57363        let p = (a.data).as_ptr();
57364        let r = _mm512_load_ps(black_box(p));
57365        let e = _mm512_setr_ps(
57366            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57367        );
57368        assert_eq_m512(r, e);
57369    }
57370
57371    #[simd_test(enable = "avx512f")]
57372    unsafe fn test_mm512_store_ps() {
57373        let a = _mm512_set1_ps(9.);
57374        let mut r = _mm512_undefined_ps();
57375        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57376        assert_eq_m512(r, a);
57377    }
57378
57379    #[simd_test(enable = "avx512f")]
57380    unsafe fn test_mm512_mask_set1_epi32() {
57381        let src = _mm512_set1_epi32(2);
57382        let a: i32 = 11;
57383        let r = _mm512_mask_set1_epi32(src, 0, a);
57384        assert_eq_m512i(r, src);
57385        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57386        let e = _mm512_set1_epi32(11);
57387        assert_eq_m512i(r, e);
57388    }
57389
57390    #[simd_test(enable = "avx512f")]
57391    unsafe fn test_mm512_maskz_set1_epi32() {
57392        let a: i32 = 11;
57393        let r = _mm512_maskz_set1_epi32(0, a);
57394        assert_eq_m512i(r, _mm512_setzero_si512());
57395        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57396        let e = _mm512_set1_epi32(11);
57397        assert_eq_m512i(r, e);
57398    }
57399
57400    #[simd_test(enable = "avx512f,avx512vl")]
57401    unsafe fn test_mm256_mask_set1_epi32() {
57402        let src = _mm256_set1_epi32(2);
57403        let a: i32 = 11;
57404        let r = _mm256_mask_set1_epi32(src, 0, a);
57405        assert_eq_m256i(r, src);
57406        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57407        let e = _mm256_set1_epi32(11);
57408        assert_eq_m256i(r, e);
57409    }
57410
57411    #[simd_test(enable = "avx512f")]
57412    unsafe fn test_mm256_maskz_set1_epi32() {
57413        let a: i32 = 11;
57414        let r = _mm256_maskz_set1_epi32(0, a);
57415        assert_eq_m256i(r, _mm256_setzero_si256());
57416        let r = _mm256_maskz_set1_epi32(0b11111111, a);
57417        let e = _mm256_set1_epi32(11);
57418        assert_eq_m256i(r, e);
57419    }
57420
57421    #[simd_test(enable = "avx512f,avx512vl")]
57422    unsafe fn test_mm_mask_set1_epi32() {
57423        let src = _mm_set1_epi32(2);
57424        let a: i32 = 11;
57425        let r = _mm_mask_set1_epi32(src, 0, a);
57426        assert_eq_m128i(r, src);
57427        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57428        let e = _mm_set1_epi32(11);
57429        assert_eq_m128i(r, e);
57430    }
57431
57432    #[simd_test(enable = "avx512f")]
57433    unsafe fn test_mm_maskz_set1_epi32() {
57434        let a: i32 = 11;
57435        let r = _mm_maskz_set1_epi32(0, a);
57436        assert_eq_m128i(r, _mm_setzero_si128());
57437        let r = _mm_maskz_set1_epi32(0b00001111, a);
57438        let e = _mm_set1_epi32(11);
57439        assert_eq_m128i(r, e);
57440    }
57441
57442    #[simd_test(enable = "avx512f")]
57443    unsafe fn test_mm_mask_move_ss() {
57444        let src = _mm_set_ps(10., 11., 100., 110.);
57445        let a = _mm_set_ps(1., 2., 10., 20.);
57446        let b = _mm_set_ps(3., 4., 30., 40.);
57447        let r = _mm_mask_move_ss(src, 0, a, b);
57448        let e = _mm_set_ps(1., 2., 10., 110.);
57449        assert_eq_m128(r, e);
57450        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57451        let e = _mm_set_ps(1., 2., 10., 40.);
57452        assert_eq_m128(r, e);
57453    }
57454
57455    #[simd_test(enable = "avx512f")]
57456    unsafe fn test_mm_maskz_move_ss() {
57457        let a = _mm_set_ps(1., 2., 10., 20.);
57458        let b = _mm_set_ps(3., 4., 30., 40.);
57459        let r = _mm_maskz_move_ss(0, a, b);
57460        let e = _mm_set_ps(1., 2., 10., 0.);
57461        assert_eq_m128(r, e);
57462        let r = _mm_maskz_move_ss(0b11111111, a, b);
57463        let e = _mm_set_ps(1., 2., 10., 40.);
57464        assert_eq_m128(r, e);
57465    }
57466
57467    #[simd_test(enable = "avx512f")]
57468    unsafe fn test_mm_mask_move_sd() {
57469        let src = _mm_set_pd(10., 11.);
57470        let a = _mm_set_pd(1., 2.);
57471        let b = _mm_set_pd(3., 4.);
57472        let r = _mm_mask_move_sd(src, 0, a, b);
57473        let e = _mm_set_pd(1., 11.);
57474        assert_eq_m128d(r, e);
57475        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57476        let e = _mm_set_pd(1., 4.);
57477        assert_eq_m128d(r, e);
57478    }
57479
57480    #[simd_test(enable = "avx512f")]
57481    unsafe fn test_mm_maskz_move_sd() {
57482        let a = _mm_set_pd(1., 2.);
57483        let b = _mm_set_pd(3., 4.);
57484        let r = _mm_maskz_move_sd(0, a, b);
57485        let e = _mm_set_pd(1., 0.);
57486        assert_eq_m128d(r, e);
57487        let r = _mm_maskz_move_sd(0b11111111, a, b);
57488        let e = _mm_set_pd(1., 4.);
57489        assert_eq_m128d(r, e);
57490    }
57491
57492    #[simd_test(enable = "avx512f")]
57493    unsafe fn test_mm_mask_add_ss() {
57494        let src = _mm_set_ps(10., 11., 100., 110.);
57495        let a = _mm_set_ps(1., 2., 10., 20.);
57496        let b = _mm_set_ps(3., 4., 30., 40.);
57497        let r = _mm_mask_add_ss(src, 0, a, b);
57498        let e = _mm_set_ps(1., 2., 10., 110.);
57499        assert_eq_m128(r, e);
57500        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57501        let e = _mm_set_ps(1., 2., 10., 60.);
57502        assert_eq_m128(r, e);
57503    }
57504
57505    #[simd_test(enable = "avx512f")]
57506    unsafe fn test_mm_maskz_add_ss() {
57507        let a = _mm_set_ps(1., 2., 10., 20.);
57508        let b = _mm_set_ps(3., 4., 30., 40.);
57509        let r = _mm_maskz_add_ss(0, a, b);
57510        let e = _mm_set_ps(1., 2., 10., 0.);
57511        assert_eq_m128(r, e);
57512        let r = _mm_maskz_add_ss(0b11111111, a, b);
57513        let e = _mm_set_ps(1., 2., 10., 60.);
57514        assert_eq_m128(r, e);
57515    }
57516
57517    #[simd_test(enable = "avx512f")]
57518    unsafe fn test_mm_mask_add_sd() {
57519        let src = _mm_set_pd(10., 11.);
57520        let a = _mm_set_pd(1., 2.);
57521        let b = _mm_set_pd(3., 4.);
57522        let r = _mm_mask_add_sd(src, 0, a, b);
57523        let e = _mm_set_pd(1., 11.);
57524        assert_eq_m128d(r, e);
57525        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57526        let e = _mm_set_pd(1., 6.);
57527        assert_eq_m128d(r, e);
57528    }
57529
57530    #[simd_test(enable = "avx512f")]
57531    unsafe fn test_mm_maskz_add_sd() {
57532        let a = _mm_set_pd(1., 2.);
57533        let b = _mm_set_pd(3., 4.);
57534        let r = _mm_maskz_add_sd(0, a, b);
57535        let e = _mm_set_pd(1., 0.);
57536        assert_eq_m128d(r, e);
57537        let r = _mm_maskz_add_sd(0b11111111, a, b);
57538        let e = _mm_set_pd(1., 6.);
57539        assert_eq_m128d(r, e);
57540    }
57541
57542    #[simd_test(enable = "avx512f")]
57543    unsafe fn test_mm_mask_sub_ss() {
57544        let src = _mm_set_ps(10., 11., 100., 110.);
57545        let a = _mm_set_ps(1., 2., 10., 20.);
57546        let b = _mm_set_ps(3., 4., 30., 40.);
57547        let r = _mm_mask_sub_ss(src, 0, a, b);
57548        let e = _mm_set_ps(1., 2., 10., 110.);
57549        assert_eq_m128(r, e);
57550        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57551        let e = _mm_set_ps(1., 2., 10., -20.);
57552        assert_eq_m128(r, e);
57553    }
57554
57555    #[simd_test(enable = "avx512f")]
57556    unsafe fn test_mm_maskz_sub_ss() {
57557        let a = _mm_set_ps(1., 2., 10., 20.);
57558        let b = _mm_set_ps(3., 4., 30., 40.);
57559        let r = _mm_maskz_sub_ss(0, a, b);
57560        let e = _mm_set_ps(1., 2., 10., 0.);
57561        assert_eq_m128(r, e);
57562        let r = _mm_maskz_sub_ss(0b11111111, a, b);
57563        let e = _mm_set_ps(1., 2., 10., -20.);
57564        assert_eq_m128(r, e);
57565    }
57566
57567    #[simd_test(enable = "avx512f")]
57568    unsafe fn test_mm_mask_sub_sd() {
57569        let src = _mm_set_pd(10., 11.);
57570        let a = _mm_set_pd(1., 2.);
57571        let b = _mm_set_pd(3., 4.);
57572        let r = _mm_mask_sub_sd(src, 0, a, b);
57573        let e = _mm_set_pd(1., 11.);
57574        assert_eq_m128d(r, e);
57575        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57576        let e = _mm_set_pd(1., -2.);
57577        assert_eq_m128d(r, e);
57578    }
57579
57580    #[simd_test(enable = "avx512f")]
57581    unsafe fn test_mm_maskz_sub_sd() {
57582        let a = _mm_set_pd(1., 2.);
57583        let b = _mm_set_pd(3., 4.);
57584        let r = _mm_maskz_sub_sd(0, a, b);
57585        let e = _mm_set_pd(1., 0.);
57586        assert_eq_m128d(r, e);
57587        let r = _mm_maskz_sub_sd(0b11111111, a, b);
57588        let e = _mm_set_pd(1., -2.);
57589        assert_eq_m128d(r, e);
57590    }
57591
57592    #[simd_test(enable = "avx512f")]
57593    unsafe fn test_mm_mask_mul_ss() {
57594        let src = _mm_set_ps(10., 11., 100., 110.);
57595        let a = _mm_set_ps(1., 2., 10., 20.);
57596        let b = _mm_set_ps(3., 4., 30., 40.);
57597        let r = _mm_mask_mul_ss(src, 0, a, b);
57598        let e = _mm_set_ps(1., 2., 10., 110.);
57599        assert_eq_m128(r, e);
57600        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57601        let e = _mm_set_ps(1., 2., 10., 800.);
57602        assert_eq_m128(r, e);
57603    }
57604
57605    #[simd_test(enable = "avx512f")]
57606    unsafe fn test_mm_maskz_mul_ss() {
57607        let a = _mm_set_ps(1., 2., 10., 20.);
57608        let b = _mm_set_ps(3., 4., 30., 40.);
57609        let r = _mm_maskz_mul_ss(0, a, b);
57610        let e = _mm_set_ps(1., 2., 10., 0.);
57611        assert_eq_m128(r, e);
57612        let r = _mm_maskz_mul_ss(0b11111111, a, b);
57613        let e = _mm_set_ps(1., 2., 10., 800.);
57614        assert_eq_m128(r, e);
57615    }
57616
57617    #[simd_test(enable = "avx512f")]
57618    unsafe fn test_mm_mask_mul_sd() {
57619        let src = _mm_set_pd(10., 11.);
57620        let a = _mm_set_pd(1., 2.);
57621        let b = _mm_set_pd(3., 4.);
57622        let r = _mm_mask_mul_sd(src, 0, a, b);
57623        let e = _mm_set_pd(1., 11.);
57624        assert_eq_m128d(r, e);
57625        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57626        let e = _mm_set_pd(1., 8.);
57627        assert_eq_m128d(r, e);
57628    }
57629
57630    #[simd_test(enable = "avx512f")]
57631    unsafe fn test_mm_maskz_mul_sd() {
57632        let a = _mm_set_pd(1., 2.);
57633        let b = _mm_set_pd(3., 4.);
57634        let r = _mm_maskz_mul_sd(0, a, b);
57635        let e = _mm_set_pd(1., 0.);
57636        assert_eq_m128d(r, e);
57637        let r = _mm_maskz_mul_sd(0b11111111, a, b);
57638        let e = _mm_set_pd(1., 8.);
57639        assert_eq_m128d(r, e);
57640    }
57641
57642    #[simd_test(enable = "avx512f")]
57643    unsafe fn test_mm_mask_div_ss() {
57644        let src = _mm_set_ps(10., 11., 100., 110.);
57645        let a = _mm_set_ps(1., 2., 10., 20.);
57646        let b = _mm_set_ps(3., 4., 30., 40.);
57647        let r = _mm_mask_div_ss(src, 0, a, b);
57648        let e = _mm_set_ps(1., 2., 10., 110.);
57649        assert_eq_m128(r, e);
57650        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57651        let e = _mm_set_ps(1., 2., 10., 0.5);
57652        assert_eq_m128(r, e);
57653    }
57654
57655    #[simd_test(enable = "avx512f")]
57656    unsafe fn test_mm_maskz_div_ss() {
57657        let a = _mm_set_ps(1., 2., 10., 20.);
57658        let b = _mm_set_ps(3., 4., 30., 40.);
57659        let r = _mm_maskz_div_ss(0, a, b);
57660        let e = _mm_set_ps(1., 2., 10., 0.);
57661        assert_eq_m128(r, e);
57662        let r = _mm_maskz_div_ss(0b11111111, a, b);
57663        let e = _mm_set_ps(1., 2., 10., 0.5);
57664        assert_eq_m128(r, e);
57665    }
57666
57667    #[simd_test(enable = "avx512f")]
57668    unsafe fn test_mm_mask_div_sd() {
57669        let src = _mm_set_pd(10., 11.);
57670        let a = _mm_set_pd(1., 2.);
57671        let b = _mm_set_pd(3., 4.);
57672        let r = _mm_mask_div_sd(src, 0, a, b);
57673        let e = _mm_set_pd(1., 11.);
57674        assert_eq_m128d(r, e);
57675        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57676        let e = _mm_set_pd(1., 0.5);
57677        assert_eq_m128d(r, e);
57678    }
57679
57680    #[simd_test(enable = "avx512f")]
57681    unsafe fn test_mm_maskz_div_sd() {
57682        let a = _mm_set_pd(1., 2.);
57683        let b = _mm_set_pd(3., 4.);
57684        let r = _mm_maskz_div_sd(0, a, b);
57685        let e = _mm_set_pd(1., 0.);
57686        assert_eq_m128d(r, e);
57687        let r = _mm_maskz_div_sd(0b11111111, a, b);
57688        let e = _mm_set_pd(1., 0.5);
57689        assert_eq_m128d(r, e);
57690    }
57691
57692    #[simd_test(enable = "avx512f")]
57693    unsafe fn test_mm_mask_max_ss() {
57694        let a = _mm_set_ps(0., 1., 2., 3.);
57695        let b = _mm_set_ps(4., 5., 6., 7.);
57696        let r = _mm_mask_max_ss(a, 0, a, b);
57697        let e = _mm_set_ps(0., 1., 2., 3.);
57698        assert_eq_m128(r, e);
57699        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57700        let e = _mm_set_ps(0., 1., 2., 7.);
57701        assert_eq_m128(r, e);
57702    }
57703
57704    #[simd_test(enable = "avx512f")]
57705    unsafe fn test_mm_maskz_max_ss() {
57706        let a = _mm_set_ps(0., 1., 2., 3.);
57707        let b = _mm_set_ps(4., 5., 6., 7.);
57708        let r = _mm_maskz_max_ss(0, a, b);
57709        let e = _mm_set_ps(0., 1., 2., 0.);
57710        assert_eq_m128(r, e);
57711        let r = _mm_maskz_max_ss(0b11111111, a, b);
57712        let e = _mm_set_ps(0., 1., 2., 7.);
57713        assert_eq_m128(r, e);
57714    }
57715
57716    #[simd_test(enable = "avx512f")]
57717    unsafe fn test_mm_mask_max_sd() {
57718        let a = _mm_set_pd(0., 1.);
57719        let b = _mm_set_pd(2., 3.);
57720        let r = _mm_mask_max_sd(a, 0, a, b);
57721        let e = _mm_set_pd(0., 1.);
57722        assert_eq_m128d(r, e);
57723        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57724        let e = _mm_set_pd(0., 3.);
57725        assert_eq_m128d(r, e);
57726    }
57727
57728    #[simd_test(enable = "avx512f")]
57729    unsafe fn test_mm_maskz_max_sd() {
57730        let a = _mm_set_pd(0., 1.);
57731        let b = _mm_set_pd(2., 3.);
57732        let r = _mm_maskz_max_sd(0, a, b);
57733        let e = _mm_set_pd(0., 0.);
57734        assert_eq_m128d(r, e);
57735        let r = _mm_maskz_max_sd(0b11111111, a, b);
57736        let e = _mm_set_pd(0., 3.);
57737        assert_eq_m128d(r, e);
57738    }
57739
57740    #[simd_test(enable = "avx512f")]
57741    unsafe fn test_mm_mask_min_ss() {
57742        let a = _mm_set_ps(0., 1., 2., 3.);
57743        let b = _mm_set_ps(4., 5., 6., 7.);
57744        let r = _mm_mask_min_ss(a, 0, a, b);
57745        let e = _mm_set_ps(0., 1., 2., 3.);
57746        assert_eq_m128(r, e);
57747        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57748        let e = _mm_set_ps(0., 1., 2., 3.);
57749        assert_eq_m128(r, e);
57750    }
57751
57752    #[simd_test(enable = "avx512f")]
57753    unsafe fn test_mm_maskz_min_ss() {
57754        let a = _mm_set_ps(0., 1., 2., 3.);
57755        let b = _mm_set_ps(4., 5., 6., 7.);
57756        let r = _mm_maskz_min_ss(0, a, b);
57757        let e = _mm_set_ps(0., 1., 2., 0.);
57758        assert_eq_m128(r, e);
57759        let r = _mm_maskz_min_ss(0b11111111, a, b);
57760        let e = _mm_set_ps(0., 1., 2., 3.);
57761        assert_eq_m128(r, e);
57762    }
57763
57764    #[simd_test(enable = "avx512f")]
57765    unsafe fn test_mm_mask_min_sd() {
57766        let a = _mm_set_pd(0., 1.);
57767        let b = _mm_set_pd(2., 3.);
57768        let r = _mm_mask_min_sd(a, 0, a, b);
57769        let e = _mm_set_pd(0., 1.);
57770        assert_eq_m128d(r, e);
57771        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57772        let e = _mm_set_pd(0., 1.);
57773        assert_eq_m128d(r, e);
57774    }
57775
57776    #[simd_test(enable = "avx512f")]
57777    unsafe fn test_mm_maskz_min_sd() {
57778        let a = _mm_set_pd(0., 1.);
57779        let b = _mm_set_pd(2., 3.);
57780        let r = _mm_maskz_min_sd(0, a, b);
57781        let e = _mm_set_pd(0., 0.);
57782        assert_eq_m128d(r, e);
57783        let r = _mm_maskz_min_sd(0b11111111, a, b);
57784        let e = _mm_set_pd(0., 1.);
57785        assert_eq_m128d(r, e);
57786    }
57787
57788    #[simd_test(enable = "avx512f")]
57789    unsafe fn test_mm_mask_sqrt_ss() {
57790        let src = _mm_set_ps(10., 11., 100., 110.);
57791        let a = _mm_set_ps(1., 2., 10., 20.);
57792        let b = _mm_set_ps(3., 4., 30., 4.);
57793        let r = _mm_mask_sqrt_ss(src, 0, a, b);
57794        let e = _mm_set_ps(1., 2., 10., 110.);
57795        assert_eq_m128(r, e);
57796        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57797        let e = _mm_set_ps(1., 2., 10., 2.);
57798        assert_eq_m128(r, e);
57799    }
57800
57801    #[simd_test(enable = "avx512f")]
57802    unsafe fn test_mm_maskz_sqrt_ss() {
57803        let a = _mm_set_ps(1., 2., 10., 20.);
57804        let b = _mm_set_ps(3., 4., 30., 4.);
57805        let r = _mm_maskz_sqrt_ss(0, a, b);
57806        let e = _mm_set_ps(1., 2., 10., 0.);
57807        assert_eq_m128(r, e);
57808        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57809        let e = _mm_set_ps(1., 2., 10., 2.);
57810        assert_eq_m128(r, e);
57811    }
57812
57813    #[simd_test(enable = "avx512f")]
57814    unsafe fn test_mm_mask_sqrt_sd() {
57815        let src = _mm_set_pd(10., 11.);
57816        let a = _mm_set_pd(1., 2.);
57817        let b = _mm_set_pd(3., 4.);
57818        let r = _mm_mask_sqrt_sd(src, 0, a, b);
57819        let e = _mm_set_pd(1., 11.);
57820        assert_eq_m128d(r, e);
57821        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57822        let e = _mm_set_pd(1., 2.);
57823        assert_eq_m128d(r, e);
57824    }
57825
57826    #[simd_test(enable = "avx512f")]
57827    unsafe fn test_mm_maskz_sqrt_sd() {
57828        let a = _mm_set_pd(1., 2.);
57829        let b = _mm_set_pd(3., 4.);
57830        let r = _mm_maskz_sqrt_sd(0, a, b);
57831        let e = _mm_set_pd(1., 0.);
57832        assert_eq_m128d(r, e);
57833        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57834        let e = _mm_set_pd(1., 2.);
57835        assert_eq_m128d(r, e);
57836    }
57837
57838    #[simd_test(enable = "avx512f")]
57839    unsafe fn test_mm_rsqrt14_ss() {
57840        let a = _mm_set_ps(1., 2., 10., 20.);
57841        let b = _mm_set_ps(3., 4., 30., 4.);
57842        let r = _mm_rsqrt14_ss(a, b);
57843        let e = _mm_set_ps(1., 2., 10., 0.5);
57844        assert_eq_m128(r, e);
57845    }
57846
57847    #[simd_test(enable = "avx512f")]
57848    unsafe fn test_mm_mask_rsqrt14_ss() {
57849        let src = _mm_set_ps(10., 11., 100., 110.);
57850        let a = _mm_set_ps(1., 2., 10., 20.);
57851        let b = _mm_set_ps(3., 4., 30., 4.);
57852        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57853        let e = _mm_set_ps(1., 2., 10., 110.);
57854        assert_eq_m128(r, e);
57855        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57856        let e = _mm_set_ps(1., 2., 10., 0.5);
57857        assert_eq_m128(r, e);
57858    }
57859
57860    #[simd_test(enable = "avx512f")]
57861    unsafe fn test_mm_maskz_rsqrt14_ss() {
57862        let a = _mm_set_ps(1., 2., 10., 20.);
57863        let b = _mm_set_ps(3., 4., 30., 4.);
57864        let r = _mm_maskz_rsqrt14_ss(0, a, b);
57865        let e = _mm_set_ps(1., 2., 10., 0.);
57866        assert_eq_m128(r, e);
57867        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57868        let e = _mm_set_ps(1., 2., 10., 0.5);
57869        assert_eq_m128(r, e);
57870    }
57871
57872    #[simd_test(enable = "avx512f")]
57873    unsafe fn test_mm_rsqrt14_sd() {
57874        let a = _mm_set_pd(1., 2.);
57875        let b = _mm_set_pd(3., 4.);
57876        let r = _mm_rsqrt14_sd(a, b);
57877        let e = _mm_set_pd(1., 0.5);
57878        assert_eq_m128d(r, e);
57879    }
57880
57881    #[simd_test(enable = "avx512f")]
57882    unsafe fn test_mm_mask_rsqrt14_sd() {
57883        let src = _mm_set_pd(10., 11.);
57884        let a = _mm_set_pd(1., 2.);
57885        let b = _mm_set_pd(3., 4.);
57886        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57887        let e = _mm_set_pd(1., 11.);
57888        assert_eq_m128d(r, e);
57889        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57890        let e = _mm_set_pd(1., 0.5);
57891        assert_eq_m128d(r, e);
57892    }
57893
57894    #[simd_test(enable = "avx512f")]
57895    unsafe fn test_mm_maskz_rsqrt14_sd() {
57896        let a = _mm_set_pd(1., 2.);
57897        let b = _mm_set_pd(3., 4.);
57898        let r = _mm_maskz_rsqrt14_sd(0, a, b);
57899        let e = _mm_set_pd(1., 0.);
57900        assert_eq_m128d(r, e);
57901        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57902        let e = _mm_set_pd(1., 0.5);
57903        assert_eq_m128d(r, e);
57904    }
57905
57906    #[simd_test(enable = "avx512f")]
57907    unsafe fn test_mm_rcp14_ss() {
57908        let a = _mm_set_ps(1., 2., 10., 20.);
57909        let b = _mm_set_ps(3., 4., 30., 4.);
57910        let r = _mm_rcp14_ss(a, b);
57911        let e = _mm_set_ps(1., 2., 10., 0.25);
57912        assert_eq_m128(r, e);
57913    }
57914
57915    #[simd_test(enable = "avx512f")]
57916    unsafe fn test_mm_mask_rcp14_ss() {
57917        let src = _mm_set_ps(10., 11., 100., 110.);
57918        let a = _mm_set_ps(1., 2., 10., 20.);
57919        let b = _mm_set_ps(3., 4., 30., 4.);
57920        let r = _mm_mask_rcp14_ss(src, 0, a, b);
57921        let e = _mm_set_ps(1., 2., 10., 110.);
57922        assert_eq_m128(r, e);
57923        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57924        let e = _mm_set_ps(1., 2., 10., 0.25);
57925        assert_eq_m128(r, e);
57926    }
57927
57928    #[simd_test(enable = "avx512f")]
57929    unsafe fn test_mm_maskz_rcp14_ss() {
57930        let a = _mm_set_ps(1., 2., 10., 20.);
57931        let b = _mm_set_ps(3., 4., 30., 4.);
57932        let r = _mm_maskz_rcp14_ss(0, a, b);
57933        let e = _mm_set_ps(1., 2., 10., 0.);
57934        assert_eq_m128(r, e);
57935        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57936        let e = _mm_set_ps(1., 2., 10., 0.25);
57937        assert_eq_m128(r, e);
57938    }
57939
57940    #[simd_test(enable = "avx512f")]
57941    unsafe fn test_mm_rcp14_sd() {
57942        let a = _mm_set_pd(1., 2.);
57943        let b = _mm_set_pd(3., 4.);
57944        let r = _mm_rcp14_sd(a, b);
57945        let e = _mm_set_pd(1., 0.25);
57946        assert_eq_m128d(r, e);
57947    }
57948
57949    #[simd_test(enable = "avx512f")]
57950    unsafe fn test_mm_mask_rcp14_sd() {
57951        let src = _mm_set_pd(10., 11.);
57952        let a = _mm_set_pd(1., 2.);
57953        let b = _mm_set_pd(3., 4.);
57954        let r = _mm_mask_rcp14_sd(src, 0, a, b);
57955        let e = _mm_set_pd(1., 11.);
57956        assert_eq_m128d(r, e);
57957        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
57958        let e = _mm_set_pd(1., 0.25);
57959        assert_eq_m128d(r, e);
57960    }
57961
57962    #[simd_test(enable = "avx512f")]
57963    unsafe fn test_mm_maskz_rcp14_sd() {
57964        let a = _mm_set_pd(1., 2.);
57965        let b = _mm_set_pd(3., 4.);
57966        let r = _mm_maskz_rcp14_sd(0, a, b);
57967        let e = _mm_set_pd(1., 0.);
57968        assert_eq_m128d(r, e);
57969        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
57970        let e = _mm_set_pd(1., 0.25);
57971        assert_eq_m128d(r, e);
57972    }
57973
57974    #[simd_test(enable = "avx512f")]
57975    unsafe fn test_mm_getexp_ss() {
57976        let a = _mm_set1_ps(2.);
57977        let b = _mm_set1_ps(3.);
57978        let r = _mm_getexp_ss(a, b);
57979        let e = _mm_set_ps(2., 2., 2., 1.);
57980        assert_eq_m128(r, e);
57981    }
57982
57983    #[simd_test(enable = "avx512f")]
57984    unsafe fn test_mm_mask_getexp_ss() {
57985        let a = _mm_set1_ps(2.);
57986        let b = _mm_set1_ps(3.);
57987        let r = _mm_mask_getexp_ss(a, 0, a, b);
57988        let e = _mm_set_ps(2., 2., 2., 2.);
57989        assert_eq_m128(r, e);
57990        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
57991        let e = _mm_set_ps(2., 2., 2., 1.);
57992        assert_eq_m128(r, e);
57993    }
57994
57995    #[simd_test(enable = "avx512f")]
57996    unsafe fn test_mm_maskz_getexp_ss() {
57997        let a = _mm_set1_ps(2.);
57998        let b = _mm_set1_ps(3.);
57999        let r = _mm_maskz_getexp_ss(0, a, b);
58000        let e = _mm_set_ps(2., 2., 2., 0.);
58001        assert_eq_m128(r, e);
58002        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
58003        let e = _mm_set_ps(2., 2., 2., 1.);
58004        assert_eq_m128(r, e);
58005    }
58006
58007    #[simd_test(enable = "avx512f")]
58008    unsafe fn test_mm_getexp_sd() {
58009        let a = _mm_set1_pd(2.);
58010        let b = _mm_set1_pd(3.);
58011        let r = _mm_getexp_sd(a, b);
58012        let e = _mm_set_pd(2., 1.);
58013        assert_eq_m128d(r, e);
58014    }
58015
58016    #[simd_test(enable = "avx512f")]
58017    unsafe fn test_mm_mask_getexp_sd() {
58018        let a = _mm_set1_pd(2.);
58019        let b = _mm_set1_pd(3.);
58020        let r = _mm_mask_getexp_sd(a, 0, a, b);
58021        let e = _mm_set_pd(2., 2.);
58022        assert_eq_m128d(r, e);
58023        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58024        let e = _mm_set_pd(2., 1.);
58025        assert_eq_m128d(r, e);
58026    }
58027
58028    #[simd_test(enable = "avx512f")]
58029    unsafe fn test_mm_maskz_getexp_sd() {
58030        let a = _mm_set1_pd(2.);
58031        let b = _mm_set1_pd(3.);
58032        let r = _mm_maskz_getexp_sd(0, a, b);
58033        let e = _mm_set_pd(2., 0.);
58034        assert_eq_m128d(r, e);
58035        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58036        let e = _mm_set_pd(2., 1.);
58037        assert_eq_m128d(r, e);
58038    }
58039
58040    #[simd_test(enable = "avx512f")]
58041    unsafe fn test_mm_getmant_ss() {
58042        let a = _mm_set1_ps(20.);
58043        let b = _mm_set1_ps(10.);
58044        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58045        let e = _mm_set_ps(20., 20., 20., 1.25);
58046        assert_eq_m128(r, e);
58047    }
58048
58049    #[simd_test(enable = "avx512f")]
58050    unsafe fn test_mm_mask_getmant_ss() {
58051        let a = _mm_set1_ps(20.);
58052        let b = _mm_set1_ps(10.);
58053        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58054        let e = _mm_set_ps(20., 20., 20., 20.);
58055        assert_eq_m128(r, e);
58056        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58057        let e = _mm_set_ps(20., 20., 20., 1.25);
58058        assert_eq_m128(r, e);
58059    }
58060
58061    #[simd_test(enable = "avx512f")]
58062    unsafe fn test_mm_maskz_getmant_ss() {
58063        let a = _mm_set1_ps(20.);
58064        let b = _mm_set1_ps(10.);
58065        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58066        let e = _mm_set_ps(20., 20., 20., 0.);
58067        assert_eq_m128(r, e);
58068        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58069        let e = _mm_set_ps(20., 20., 20., 1.25);
58070        assert_eq_m128(r, e);
58071    }
58072
58073    #[simd_test(enable = "avx512f")]
58074    unsafe fn test_mm_getmant_sd() {
58075        let a = _mm_set1_pd(20.);
58076        let b = _mm_set1_pd(10.);
58077        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58078        let e = _mm_set_pd(20., 1.25);
58079        assert_eq_m128d(r, e);
58080    }
58081
58082    #[simd_test(enable = "avx512f")]
58083    unsafe fn test_mm_mask_getmant_sd() {
58084        let a = _mm_set1_pd(20.);
58085        let b = _mm_set1_pd(10.);
58086        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58087        let e = _mm_set_pd(20., 20.);
58088        assert_eq_m128d(r, e);
58089        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58090        let e = _mm_set_pd(20., 1.25);
58091        assert_eq_m128d(r, e);
58092    }
58093
58094    #[simd_test(enable = "avx512f")]
58095    unsafe fn test_mm_maskz_getmant_sd() {
58096        let a = _mm_set1_pd(20.);
58097        let b = _mm_set1_pd(10.);
58098        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58099        let e = _mm_set_pd(20., 0.);
58100        assert_eq_m128d(r, e);
58101        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58102        let e = _mm_set_pd(20., 1.25);
58103        assert_eq_m128d(r, e);
58104    }
58105
58106    #[simd_test(enable = "avx512f")]
58107    unsafe fn test_mm_roundscale_ss() {
58108        let a = _mm_set1_ps(2.2);
58109        let b = _mm_set1_ps(1.1);
58110        let r = _mm_roundscale_ss::<0>(a, b);
58111        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58112        assert_eq_m128(r, e);
58113    }
58114
58115    #[simd_test(enable = "avx512f")]
58116    unsafe fn test_mm_mask_roundscale_ss() {
58117        let a = _mm_set1_ps(2.2);
58118        let b = _mm_set1_ps(1.1);
58119        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58120        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58121        assert_eq_m128(r, e);
58122        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58123        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58124        assert_eq_m128(r, e);
58125    }
58126
58127    #[simd_test(enable = "avx512f")]
58128    unsafe fn test_mm_maskz_roundscale_ss() {
58129        let a = _mm_set1_ps(2.2);
58130        let b = _mm_set1_ps(1.1);
58131        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58132        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58133        assert_eq_m128(r, e);
58134        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58135        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58136        assert_eq_m128(r, e);
58137    }
58138
58139    #[simd_test(enable = "avx512f")]
58140    unsafe fn test_mm_roundscale_sd() {
58141        let a = _mm_set1_pd(2.2);
58142        let b = _mm_set1_pd(1.1);
58143        let r = _mm_roundscale_sd::<0>(a, b);
58144        let e = _mm_set_pd(2.2, 1.0);
58145        assert_eq_m128d(r, e);
58146    }
58147
58148    #[simd_test(enable = "avx512f")]
58149    unsafe fn test_mm_mask_roundscale_sd() {
58150        let a = _mm_set1_pd(2.2);
58151        let b = _mm_set1_pd(1.1);
58152        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58153        let e = _mm_set_pd(2.2, 2.2);
58154        assert_eq_m128d(r, e);
58155        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58156        let e = _mm_set_pd(2.2, 1.0);
58157        assert_eq_m128d(r, e);
58158    }
58159
58160    #[simd_test(enable = "avx512f")]
58161    unsafe fn test_mm_maskz_roundscale_sd() {
58162        let a = _mm_set1_pd(2.2);
58163        let b = _mm_set1_pd(1.1);
58164        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58165        let e = _mm_set_pd(2.2, 0.0);
58166        assert_eq_m128d(r, e);
58167        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58168        let e = _mm_set_pd(2.2, 1.0);
58169        assert_eq_m128d(r, e);
58170    }
58171
58172    #[simd_test(enable = "avx512f")]
58173    unsafe fn test_mm_scalef_ss() {
58174        let a = _mm_set1_ps(1.);
58175        let b = _mm_set1_ps(3.);
58176        let r = _mm_scalef_ss(a, b);
58177        let e = _mm_set_ps(1., 1., 1., 8.);
58178        assert_eq_m128(r, e);
58179    }
58180
58181    #[simd_test(enable = "avx512f")]
58182    unsafe fn test_mm_mask_scalef_ss() {
58183        let a = _mm_set1_ps(1.);
58184        let b = _mm_set1_ps(3.);
58185        let r = _mm_mask_scalef_ss(a, 0, a, b);
58186        let e = _mm_set_ps(1., 1., 1., 1.);
58187        assert_eq_m128(r, e);
58188        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58189        let e = _mm_set_ps(1., 1., 1., 8.);
58190        assert_eq_m128(r, e);
58191    }
58192
58193    #[simd_test(enable = "avx512f")]
58194    unsafe fn test_mm_maskz_scalef_ss() {
58195        let a = _mm_set1_ps(1.);
58196        let b = _mm_set1_ps(3.);
58197        let r = _mm_maskz_scalef_ss(0, a, b);
58198        let e = _mm_set_ps(1., 1., 1., 0.);
58199        assert_eq_m128(r, e);
58200        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58201        let e = _mm_set_ps(1., 1., 1., 8.);
58202        assert_eq_m128(r, e);
58203    }
58204
58205    #[simd_test(enable = "avx512f")]
58206    unsafe fn test_mm_scalef_sd() {
58207        let a = _mm_set1_pd(1.);
58208        let b = _mm_set1_pd(3.);
58209        let r = _mm_scalef_sd(a, b);
58210        let e = _mm_set_pd(1., 8.);
58211        assert_eq_m128d(r, e);
58212    }
58213
58214    #[simd_test(enable = "avx512f")]
58215    unsafe fn test_mm_mask_scalef_sd() {
58216        let a = _mm_set1_pd(1.);
58217        let b = _mm_set1_pd(3.);
58218        let r = _mm_mask_scalef_sd(a, 0, a, b);
58219        let e = _mm_set_pd(1., 1.);
58220        assert_eq_m128d(r, e);
58221        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58222        let e = _mm_set_pd(1., 8.);
58223        assert_eq_m128d(r, e);
58224    }
58225
58226    #[simd_test(enable = "avx512f")]
58227    unsafe fn test_mm_maskz_scalef_sd() {
58228        let a = _mm_set1_pd(1.);
58229        let b = _mm_set1_pd(3.);
58230        let r = _mm_maskz_scalef_sd(0, a, b);
58231        let e = _mm_set_pd(1., 0.);
58232        assert_eq_m128d(r, e);
58233        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58234        let e = _mm_set_pd(1., 8.);
58235        assert_eq_m128d(r, e);
58236    }
58237
58238    #[simd_test(enable = "avx512f")]
58239    unsafe fn test_mm_mask_fmadd_ss() {
58240        let a = _mm_set1_ps(1.);
58241        let b = _mm_set1_ps(2.);
58242        let c = _mm_set1_ps(3.);
58243        let r = _mm_mask_fmadd_ss(a, 0, b, c);
58244        assert_eq_m128(r, a);
58245        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58246        let e = _mm_set_ps(1., 1., 1., 5.);
58247        assert_eq_m128(r, e);
58248    }
58249
58250    #[simd_test(enable = "avx512f")]
58251    unsafe fn test_mm_maskz_fmadd_ss() {
58252        let a = _mm_set1_ps(1.);
58253        let b = _mm_set1_ps(2.);
58254        let c = _mm_set1_ps(3.);
58255        let r = _mm_maskz_fmadd_ss(0, a, b, c);
58256        let e = _mm_set_ps(1., 1., 1., 0.);
58257        assert_eq_m128(r, e);
58258        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58259        let e = _mm_set_ps(1., 1., 1., 5.);
58260        assert_eq_m128(r, e);
58261    }
58262
58263    #[simd_test(enable = "avx512f")]
58264    unsafe fn test_mm_mask3_fmadd_ss() {
58265        let a = _mm_set1_ps(1.);
58266        let b = _mm_set1_ps(2.);
58267        let c = _mm_set1_ps(3.);
58268        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58269        assert_eq_m128(r, c);
58270        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58271        let e = _mm_set_ps(3., 3., 3., 5.);
58272        assert_eq_m128(r, e);
58273    }
58274
58275    #[simd_test(enable = "avx512f")]
58276    unsafe fn test_mm_mask_fmadd_sd() {
58277        let a = _mm_set1_pd(1.);
58278        let b = _mm_set1_pd(2.);
58279        let c = _mm_set1_pd(3.);
58280        let r = _mm_mask_fmadd_sd(a, 0, b, c);
58281        assert_eq_m128d(r, a);
58282        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58283        let e = _mm_set_pd(1., 5.);
58284        assert_eq_m128d(r, e);
58285    }
58286
58287    #[simd_test(enable = "avx512f")]
58288    unsafe fn test_mm_maskz_fmadd_sd() {
58289        let a = _mm_set1_pd(1.);
58290        let b = _mm_set1_pd(2.);
58291        let c = _mm_set1_pd(3.);
58292        let r = _mm_maskz_fmadd_sd(0, a, b, c);
58293        let e = _mm_set_pd(1., 0.);
58294        assert_eq_m128d(r, e);
58295        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58296        let e = _mm_set_pd(1., 5.);
58297        assert_eq_m128d(r, e);
58298    }
58299
58300    #[simd_test(enable = "avx512f")]
58301    unsafe fn test_mm_mask3_fmadd_sd() {
58302        let a = _mm_set1_pd(1.);
58303        let b = _mm_set1_pd(2.);
58304        let c = _mm_set1_pd(3.);
58305        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58306        assert_eq_m128d(r, c);
58307        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58308        let e = _mm_set_pd(3., 5.);
58309        assert_eq_m128d(r, e);
58310    }
58311
58312    #[simd_test(enable = "avx512f")]
58313    unsafe fn test_mm_mask_fmsub_ss() {
58314        let a = _mm_set1_ps(1.);
58315        let b = _mm_set1_ps(2.);
58316        let c = _mm_set1_ps(3.);
58317        let r = _mm_mask_fmsub_ss(a, 0, b, c);
58318        assert_eq_m128(r, a);
58319        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58320        let e = _mm_set_ps(1., 1., 1., -1.);
58321        assert_eq_m128(r, e);
58322    }
58323
58324    #[simd_test(enable = "avx512f")]
58325    unsafe fn test_mm_maskz_fmsub_ss() {
58326        let a = _mm_set1_ps(1.);
58327        let b = _mm_set1_ps(2.);
58328        let c = _mm_set1_ps(3.);
58329        let r = _mm_maskz_fmsub_ss(0, a, b, c);
58330        let e = _mm_set_ps(1., 1., 1., 0.);
58331        assert_eq_m128(r, e);
58332        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58333        let e = _mm_set_ps(1., 1., 1., -1.);
58334        assert_eq_m128(r, e);
58335    }
58336
58337    #[simd_test(enable = "avx512f")]
58338    unsafe fn test_mm_mask3_fmsub_ss() {
58339        let a = _mm_set1_ps(1.);
58340        let b = _mm_set1_ps(2.);
58341        let c = _mm_set1_ps(3.);
58342        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58343        assert_eq_m128(r, c);
58344        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58345        let e = _mm_set_ps(3., 3., 3., -1.);
58346        assert_eq_m128(r, e);
58347    }
58348
58349    #[simd_test(enable = "avx512f")]
58350    unsafe fn test_mm_mask_fmsub_sd() {
58351        let a = _mm_set1_pd(1.);
58352        let b = _mm_set1_pd(2.);
58353        let c = _mm_set1_pd(3.);
58354        let r = _mm_mask_fmsub_sd(a, 0, b, c);
58355        assert_eq_m128d(r, a);
58356        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58357        let e = _mm_set_pd(1., -1.);
58358        assert_eq_m128d(r, e);
58359    }
58360
58361    #[simd_test(enable = "avx512f")]
58362    unsafe fn test_mm_maskz_fmsub_sd() {
58363        let a = _mm_set1_pd(1.);
58364        let b = _mm_set1_pd(2.);
58365        let c = _mm_set1_pd(3.);
58366        let r = _mm_maskz_fmsub_sd(0, a, b, c);
58367        let e = _mm_set_pd(1., 0.);
58368        assert_eq_m128d(r, e);
58369        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58370        let e = _mm_set_pd(1., -1.);
58371        assert_eq_m128d(r, e);
58372    }
58373
58374    #[simd_test(enable = "avx512f")]
58375    unsafe fn test_mm_mask3_fmsub_sd() {
58376        let a = _mm_set1_pd(1.);
58377        let b = _mm_set1_pd(2.);
58378        let c = _mm_set1_pd(3.);
58379        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58380        assert_eq_m128d(r, c);
58381        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58382        let e = _mm_set_pd(3., -1.);
58383        assert_eq_m128d(r, e);
58384    }
58385
58386    #[simd_test(enable = "avx512f")]
58387    unsafe fn test_mm_mask_fnmadd_ss() {
58388        let a = _mm_set1_ps(1.);
58389        let b = _mm_set1_ps(2.);
58390        let c = _mm_set1_ps(3.);
58391        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58392        assert_eq_m128(r, a);
58393        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58394        let e = _mm_set_ps(1., 1., 1., 1.);
58395        assert_eq_m128(r, e);
58396    }
58397
58398    #[simd_test(enable = "avx512f")]
58399    unsafe fn test_mm_maskz_fnmadd_ss() {
58400        let a = _mm_set1_ps(1.);
58401        let b = _mm_set1_ps(2.);
58402        let c = _mm_set1_ps(3.);
58403        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58404        let e = _mm_set_ps(1., 1., 1., 0.);
58405        assert_eq_m128(r, e);
58406        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58407        let e = _mm_set_ps(1., 1., 1., 1.);
58408        assert_eq_m128(r, e);
58409    }
58410
58411    #[simd_test(enable = "avx512f")]
58412    unsafe fn test_mm_mask3_fnmadd_ss() {
58413        let a = _mm_set1_ps(1.);
58414        let b = _mm_set1_ps(2.);
58415        let c = _mm_set1_ps(3.);
58416        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58417        assert_eq_m128(r, c);
58418        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58419        let e = _mm_set_ps(3., 3., 3., 1.);
58420        assert_eq_m128(r, e);
58421    }
58422
58423    #[simd_test(enable = "avx512f")]
58424    unsafe fn test_mm_mask_fnmadd_sd() {
58425        let a = _mm_set1_pd(1.);
58426        let b = _mm_set1_pd(2.);
58427        let c = _mm_set1_pd(3.);
58428        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58429        assert_eq_m128d(r, a);
58430        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58431        let e = _mm_set_pd(1., 1.);
58432        assert_eq_m128d(r, e);
58433    }
58434
58435    #[simd_test(enable = "avx512f")]
58436    unsafe fn test_mm_maskz_fnmadd_sd() {
58437        let a = _mm_set1_pd(1.);
58438        let b = _mm_set1_pd(2.);
58439        let c = _mm_set1_pd(3.);
58440        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58441        let e = _mm_set_pd(1., 0.);
58442        assert_eq_m128d(r, e);
58443        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58444        let e = _mm_set_pd(1., 1.);
58445        assert_eq_m128d(r, e);
58446    }
58447
58448    #[simd_test(enable = "avx512f")]
58449    unsafe fn test_mm_mask3_fnmadd_sd() {
58450        let a = _mm_set1_pd(1.);
58451        let b = _mm_set1_pd(2.);
58452        let c = _mm_set1_pd(3.);
58453        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58454        assert_eq_m128d(r, c);
58455        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58456        let e = _mm_set_pd(3., 1.);
58457        assert_eq_m128d(r, e);
58458    }
58459
58460    #[simd_test(enable = "avx512f")]
58461    unsafe fn test_mm_mask_fnmsub_ss() {
58462        let a = _mm_set1_ps(1.);
58463        let b = _mm_set1_ps(2.);
58464        let c = _mm_set1_ps(3.);
58465        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58466        assert_eq_m128(r, a);
58467        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58468        let e = _mm_set_ps(1., 1., 1., -5.);
58469        assert_eq_m128(r, e);
58470    }
58471
58472    #[simd_test(enable = "avx512f")]
58473    unsafe fn test_mm_maskz_fnmsub_ss() {
58474        let a = _mm_set1_ps(1.);
58475        let b = _mm_set1_ps(2.);
58476        let c = _mm_set1_ps(3.);
58477        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58478        let e = _mm_set_ps(1., 1., 1., 0.);
58479        assert_eq_m128(r, e);
58480        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58481        let e = _mm_set_ps(1., 1., 1., -5.);
58482        assert_eq_m128(r, e);
58483    }
58484
58485    #[simd_test(enable = "avx512f")]
58486    unsafe fn test_mm_mask3_fnmsub_ss() {
58487        let a = _mm_set1_ps(1.);
58488        let b = _mm_set1_ps(2.);
58489        let c = _mm_set1_ps(3.);
58490        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58491        assert_eq_m128(r, c);
58492        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58493        let e = _mm_set_ps(3., 3., 3., -5.);
58494        assert_eq_m128(r, e);
58495    }
58496
58497    #[simd_test(enable = "avx512f")]
58498    unsafe fn test_mm_mask_fnmsub_sd() {
58499        let a = _mm_set1_pd(1.);
58500        let b = _mm_set1_pd(2.);
58501        let c = _mm_set1_pd(3.);
58502        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58503        assert_eq_m128d(r, a);
58504        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58505        let e = _mm_set_pd(1., -5.);
58506        assert_eq_m128d(r, e);
58507    }
58508
58509    #[simd_test(enable = "avx512f")]
58510    unsafe fn test_mm_maskz_fnmsub_sd() {
58511        let a = _mm_set1_pd(1.);
58512        let b = _mm_set1_pd(2.);
58513        let c = _mm_set1_pd(3.);
58514        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58515        let e = _mm_set_pd(1., 0.);
58516        assert_eq_m128d(r, e);
58517        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58518        let e = _mm_set_pd(1., -5.);
58519        assert_eq_m128d(r, e);
58520    }
58521
58522    #[simd_test(enable = "avx512f")]
58523    unsafe fn test_mm_mask3_fnmsub_sd() {
58524        let a = _mm_set1_pd(1.);
58525        let b = _mm_set1_pd(2.);
58526        let c = _mm_set1_pd(3.);
58527        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58528        assert_eq_m128d(r, c);
58529        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58530        let e = _mm_set_pd(3., -5.);
58531        assert_eq_m128d(r, e);
58532    }
58533
58534    #[simd_test(enable = "avx512f")]
58535    unsafe fn test_mm_add_round_ss() {
58536        let a = _mm_set_ps(1., 2., 10., 20.);
58537        let b = _mm_set_ps(3., 4., 30., 40.);
58538        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58539        let e = _mm_set_ps(1., 2., 10., 60.);
58540        assert_eq_m128(r, e);
58541    }
58542
58543    #[simd_test(enable = "avx512f")]
58544    unsafe fn test_mm_mask_add_round_ss() {
58545        let src = _mm_set_ps(10., 11., 100., 110.);
58546        let a = _mm_set_ps(1., 2., 10., 20.);
58547        let b = _mm_set_ps(3., 4., 30., 40.);
58548        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58549        let e = _mm_set_ps(1., 2., 10., 110.);
58550        assert_eq_m128(r, e);
58551        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58552            src, 0b11111111, a, b,
58553        );
58554        let e = _mm_set_ps(1., 2., 10., 60.);
58555        assert_eq_m128(r, e);
58556    }
58557
58558    #[simd_test(enable = "avx512f")]
58559    unsafe fn test_mm_maskz_add_round_ss() {
58560        let a = _mm_set_ps(1., 2., 10., 20.);
58561        let b = _mm_set_ps(3., 4., 30., 40.);
58562        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58563        let e = _mm_set_ps(1., 2., 10., 0.);
58564        assert_eq_m128(r, e);
58565        let r =
58566            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58567        let e = _mm_set_ps(1., 2., 10., 60.);
58568        assert_eq_m128(r, e);
58569    }
58570
58571    #[simd_test(enable = "avx512f")]
58572    unsafe fn test_mm_add_round_sd() {
58573        let a = _mm_set_pd(1., 2.);
58574        let b = _mm_set_pd(3., 4.);
58575        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58576        let e = _mm_set_pd(1., 6.);
58577        assert_eq_m128d(r, e);
58578    }
58579
58580    #[simd_test(enable = "avx512f")]
58581    unsafe fn test_mm_mask_add_round_sd() {
58582        let src = _mm_set_pd(10., 11.);
58583        let a = _mm_set_pd(1., 2.);
58584        let b = _mm_set_pd(3., 4.);
58585        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58586        let e = _mm_set_pd(1., 11.);
58587        assert_eq_m128d(r, e);
58588        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58589            src, 0b11111111, a, b,
58590        );
58591        let e = _mm_set_pd(1., 6.);
58592        assert_eq_m128d(r, e);
58593    }
58594
58595    #[simd_test(enable = "avx512f")]
58596    unsafe fn test_mm_maskz_add_round_sd() {
58597        let a = _mm_set_pd(1., 2.);
58598        let b = _mm_set_pd(3., 4.);
58599        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58600        let e = _mm_set_pd(1., 0.);
58601        assert_eq_m128d(r, e);
58602        let r =
58603            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58604        let e = _mm_set_pd(1., 6.);
58605        assert_eq_m128d(r, e);
58606    }
58607
58608    #[simd_test(enable = "avx512f")]
58609    unsafe fn test_mm_sub_round_ss() {
58610        let a = _mm_set_ps(1., 2., 10., 20.);
58611        let b = _mm_set_ps(3., 4., 30., 40.);
58612        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58613        let e = _mm_set_ps(1., 2., 10., -20.);
58614        assert_eq_m128(r, e);
58615    }
58616
58617    #[simd_test(enable = "avx512f")]
58618    unsafe fn test_mm_mask_sub_round_ss() {
58619        let src = _mm_set_ps(10., 11., 100., 110.);
58620        let a = _mm_set_ps(1., 2., 10., 20.);
58621        let b = _mm_set_ps(3., 4., 30., 40.);
58622        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58623        let e = _mm_set_ps(1., 2., 10., 110.);
58624        assert_eq_m128(r, e);
58625        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58626            src, 0b11111111, a, b,
58627        );
58628        let e = _mm_set_ps(1., 2., 10., -20.);
58629        assert_eq_m128(r, e);
58630    }
58631
58632    #[simd_test(enable = "avx512f")]
58633    unsafe fn test_mm_maskz_sub_round_ss() {
58634        let a = _mm_set_ps(1., 2., 10., 20.);
58635        let b = _mm_set_ps(3., 4., 30., 40.);
58636        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58637        let e = _mm_set_ps(1., 2., 10., 0.);
58638        assert_eq_m128(r, e);
58639        let r =
58640            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58641        let e = _mm_set_ps(1., 2., 10., -20.);
58642        assert_eq_m128(r, e);
58643    }
58644
58645    #[simd_test(enable = "avx512f")]
58646    unsafe fn test_mm_sub_round_sd() {
58647        let a = _mm_set_pd(1., 2.);
58648        let b = _mm_set_pd(3., 4.);
58649        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58650        let e = _mm_set_pd(1., -2.);
58651        assert_eq_m128d(r, e);
58652    }
58653
58654    #[simd_test(enable = "avx512f")]
58655    unsafe fn test_mm_mask_sub_round_sd() {
58656        let src = _mm_set_pd(10., 11.);
58657        let a = _mm_set_pd(1., 2.);
58658        let b = _mm_set_pd(3., 4.);
58659        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58660        let e = _mm_set_pd(1., 11.);
58661        assert_eq_m128d(r, e);
58662        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58663            src, 0b11111111, a, b,
58664        );
58665        let e = _mm_set_pd(1., -2.);
58666        assert_eq_m128d(r, e);
58667    }
58668
58669    #[simd_test(enable = "avx512f")]
58670    unsafe fn test_mm_maskz_sub_round_sd() {
58671        let a = _mm_set_pd(1., 2.);
58672        let b = _mm_set_pd(3., 4.);
58673        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58674        let e = _mm_set_pd(1., 0.);
58675        assert_eq_m128d(r, e);
58676        let r =
58677            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58678        let e = _mm_set_pd(1., -2.);
58679        assert_eq_m128d(r, e);
58680    }
58681
58682    #[simd_test(enable = "avx512f")]
58683    unsafe fn test_mm_mul_round_ss() {
58684        let a = _mm_set_ps(1., 2., 10., 20.);
58685        let b = _mm_set_ps(3., 4., 30., 40.);
58686        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58687        let e = _mm_set_ps(1., 2., 10., 800.);
58688        assert_eq_m128(r, e);
58689    }
58690
58691    #[simd_test(enable = "avx512f")]
58692    unsafe fn test_mm_mask_mul_round_ss() {
58693        let src = _mm_set_ps(10., 11., 100., 110.);
58694        let a = _mm_set_ps(1., 2., 10., 20.);
58695        let b = _mm_set_ps(3., 4., 30., 40.);
58696        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58697        let e = _mm_set_ps(1., 2., 10., 110.);
58698        assert_eq_m128(r, e);
58699        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58700            src, 0b11111111, a, b,
58701        );
58702        let e = _mm_set_ps(1., 2., 10., 800.);
58703        assert_eq_m128(r, e);
58704    }
58705
58706    #[simd_test(enable = "avx512f")]
58707    unsafe fn test_mm_maskz_mul_round_ss() {
58708        let a = _mm_set_ps(1., 2., 10., 20.);
58709        let b = _mm_set_ps(3., 4., 30., 40.);
58710        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58711        let e = _mm_set_ps(1., 2., 10., 0.);
58712        assert_eq_m128(r, e);
58713        let r =
58714            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58715        let e = _mm_set_ps(1., 2., 10., 800.);
58716        assert_eq_m128(r, e);
58717    }
58718
58719    #[simd_test(enable = "avx512f")]
58720    unsafe fn test_mm_mul_round_sd() {
58721        let a = _mm_set_pd(1., 2.);
58722        let b = _mm_set_pd(3., 4.);
58723        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58724        let e = _mm_set_pd(1., 8.);
58725        assert_eq_m128d(r, e);
58726    }
58727
58728    #[simd_test(enable = "avx512f")]
58729    unsafe fn test_mm_mask_mul_round_sd() {
58730        let src = _mm_set_pd(10., 11.);
58731        let a = _mm_set_pd(1., 2.);
58732        let b = _mm_set_pd(3., 4.);
58733        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58734        let e = _mm_set_pd(1., 11.);
58735        assert_eq_m128d(r, e);
58736        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58737            src, 0b11111111, a, b,
58738        );
58739        let e = _mm_set_pd(1., 8.);
58740        assert_eq_m128d(r, e);
58741    }
58742
58743    #[simd_test(enable = "avx512f")]
58744    unsafe fn test_mm_maskz_mul_round_sd() {
58745        let a = _mm_set_pd(1., 2.);
58746        let b = _mm_set_pd(3., 4.);
58747        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58748        let e = _mm_set_pd(1., 0.);
58749        assert_eq_m128d(r, e);
58750        let r =
58751            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58752        let e = _mm_set_pd(1., 8.);
58753        assert_eq_m128d(r, e);
58754    }
58755
58756    #[simd_test(enable = "avx512f")]
58757    unsafe fn test_mm_div_round_ss() {
58758        let a = _mm_set_ps(1., 2., 10., 20.);
58759        let b = _mm_set_ps(3., 4., 30., 40.);
58760        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58761        let e = _mm_set_ps(1., 2., 10., 0.5);
58762        assert_eq_m128(r, e);
58763    }
58764
58765    #[simd_test(enable = "avx512f")]
58766    unsafe fn test_mm_mask_div_round_ss() {
58767        let src = _mm_set_ps(10., 11., 100., 110.);
58768        let a = _mm_set_ps(1., 2., 10., 20.);
58769        let b = _mm_set_ps(3., 4., 30., 40.);
58770        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58771        let e = _mm_set_ps(1., 2., 10., 110.);
58772        assert_eq_m128(r, e);
58773        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58774            src, 0b11111111, a, b,
58775        );
58776        let e = _mm_set_ps(1., 2., 10., 0.5);
58777        assert_eq_m128(r, e);
58778    }
58779
58780    #[simd_test(enable = "avx512f")]
58781    unsafe fn test_mm_maskz_div_round_ss() {
58782        let a = _mm_set_ps(1., 2., 10., 20.);
58783        let b = _mm_set_ps(3., 4., 30., 40.);
58784        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58785        let e = _mm_set_ps(1., 2., 10., 0.);
58786        assert_eq_m128(r, e);
58787        let r =
58788            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58789        let e = _mm_set_ps(1., 2., 10., 0.5);
58790        assert_eq_m128(r, e);
58791    }
58792
58793    #[simd_test(enable = "avx512f")]
58794    unsafe fn test_mm_div_round_sd() {
58795        let a = _mm_set_pd(1., 2.);
58796        let b = _mm_set_pd(3., 4.);
58797        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58798        let e = _mm_set_pd(1., 0.5);
58799        assert_eq_m128d(r, e);
58800    }
58801
58802    #[simd_test(enable = "avx512f")]
58803    unsafe fn test_mm_mask_div_round_sd() {
58804        let src = _mm_set_pd(10., 11.);
58805        let a = _mm_set_pd(1., 2.);
58806        let b = _mm_set_pd(3., 4.);
58807        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58808        let e = _mm_set_pd(1., 11.);
58809        assert_eq_m128d(r, e);
58810        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58811            src, 0b11111111, a, b,
58812        );
58813        let e = _mm_set_pd(1., 0.5);
58814        assert_eq_m128d(r, e);
58815    }
58816
58817    #[simd_test(enable = "avx512f")]
58818    unsafe fn test_mm_maskz_div_round_sd() {
58819        let a = _mm_set_pd(1., 2.);
58820        let b = _mm_set_pd(3., 4.);
58821        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58822        let e = _mm_set_pd(1., 0.);
58823        assert_eq_m128d(r, e);
58824        let r =
58825            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58826        let e = _mm_set_pd(1., 0.5);
58827        assert_eq_m128d(r, e);
58828    }
58829
58830    #[simd_test(enable = "avx512f")]
58831    unsafe fn test_mm_max_round_ss() {
58832        let a = _mm_set_ps(0., 1., 2., 3.);
58833        let b = _mm_set_ps(4., 5., 6., 7.);
58834        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58835        let e = _mm_set_ps(0., 1., 2., 7.);
58836        assert_eq_m128(r, e);
58837    }
58838
58839    #[simd_test(enable = "avx512f")]
58840    unsafe fn test_mm_mask_max_round_ss() {
58841        let a = _mm_set_ps(0., 1., 2., 3.);
58842        let b = _mm_set_ps(4., 5., 6., 7.);
58843        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58844        let e = _mm_set_ps(0., 1., 2., 3.);
58845        assert_eq_m128(r, e);
58846        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58847        let e = _mm_set_ps(0., 1., 2., 7.);
58848        assert_eq_m128(r, e);
58849    }
58850
58851    #[simd_test(enable = "avx512f")]
58852    unsafe fn test_mm_maskz_max_round_ss() {
58853        let a = _mm_set_ps(0., 1., 2., 3.);
58854        let b = _mm_set_ps(4., 5., 6., 7.);
58855        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58856        let e = _mm_set_ps(0., 1., 2., 0.);
58857        assert_eq_m128(r, e);
58858        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58859        let e = _mm_set_ps(0., 1., 2., 7.);
58860        assert_eq_m128(r, e);
58861    }
58862
58863    #[simd_test(enable = "avx512f")]
58864    unsafe fn test_mm_max_round_sd() {
58865        let a = _mm_set_pd(0., 1.);
58866        let b = _mm_set_pd(2., 3.);
58867        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58868        let e = _mm_set_pd(0., 3.);
58869        assert_eq_m128d(r, e);
58870    }
58871
58872    #[simd_test(enable = "avx512f")]
58873    unsafe fn test_mm_mask_max_round_sd() {
58874        let a = _mm_set_pd(0., 1.);
58875        let b = _mm_set_pd(2., 3.);
58876        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58877        let e = _mm_set_pd(0., 1.);
58878        assert_eq_m128d(r, e);
58879        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58880        let e = _mm_set_pd(0., 3.);
58881        assert_eq_m128d(r, e);
58882    }
58883
58884    #[simd_test(enable = "avx512f")]
58885    unsafe fn test_mm_maskz_max_round_sd() {
58886        let a = _mm_set_pd(0., 1.);
58887        let b = _mm_set_pd(2., 3.);
58888        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58889        let e = _mm_set_pd(0., 0.);
58890        assert_eq_m128d(r, e);
58891        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58892        let e = _mm_set_pd(0., 3.);
58893        assert_eq_m128d(r, e);
58894    }
58895
58896    #[simd_test(enable = "avx512f")]
58897    unsafe fn test_mm_min_round_ss() {
58898        let a = _mm_set_ps(0., 1., 2., 3.);
58899        let b = _mm_set_ps(4., 5., 6., 7.);
58900        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58901        let e = _mm_set_ps(0., 1., 2., 3.);
58902        assert_eq_m128(r, e);
58903    }
58904
58905    #[simd_test(enable = "avx512f")]
58906    unsafe fn test_mm_mask_min_round_ss() {
58907        let a = _mm_set_ps(0., 1., 2., 3.);
58908        let b = _mm_set_ps(4., 5., 6., 7.);
58909        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58910        let e = _mm_set_ps(0., 1., 2., 3.);
58911        assert_eq_m128(r, e);
58912        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58913        let e = _mm_set_ps(0., 1., 2., 3.);
58914        assert_eq_m128(r, e);
58915    }
58916
58917    #[simd_test(enable = "avx512f")]
58918    unsafe fn test_mm_maskz_min_round_ss() {
58919        let a = _mm_set_ps(0., 1., 2., 3.);
58920        let b = _mm_set_ps(4., 5., 6., 7.);
58921        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58922        let e = _mm_set_ps(0., 1., 2., 0.);
58923        assert_eq_m128(r, e);
58924        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58925        let e = _mm_set_ps(0., 1., 2., 3.);
58926        assert_eq_m128(r, e);
58927    }
58928
58929    #[simd_test(enable = "avx512f")]
58930    unsafe fn test_mm_min_round_sd() {
58931        let a = _mm_set_pd(0., 1.);
58932        let b = _mm_set_pd(2., 3.);
58933        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58934        let e = _mm_set_pd(0., 1.);
58935        assert_eq_m128d(r, e);
58936    }
58937
58938    #[simd_test(enable = "avx512f")]
58939    unsafe fn test_mm_mask_min_round_sd() {
58940        let a = _mm_set_pd(0., 1.);
58941        let b = _mm_set_pd(2., 3.);
58942        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58943        let e = _mm_set_pd(0., 1.);
58944        assert_eq_m128d(r, e);
58945        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58946        let e = _mm_set_pd(0., 1.);
58947        assert_eq_m128d(r, e);
58948    }
58949
58950    #[simd_test(enable = "avx512f")]
58951    unsafe fn test_mm_maskz_min_round_sd() {
58952        let a = _mm_set_pd(0., 1.);
58953        let b = _mm_set_pd(2., 3.);
58954        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58955        let e = _mm_set_pd(0., 0.);
58956        assert_eq_m128d(r, e);
58957        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58958        let e = _mm_set_pd(0., 1.);
58959        assert_eq_m128d(r, e);
58960    }
58961
58962    #[simd_test(enable = "avx512f")]
58963    unsafe fn test_mm_sqrt_round_ss() {
58964        let a = _mm_set_ps(1., 2., 10., 20.);
58965        let b = _mm_set_ps(3., 4., 30., 4.);
58966        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58967        let e = _mm_set_ps(1., 2., 10., 2.);
58968        assert_eq_m128(r, e);
58969    }
58970
58971    #[simd_test(enable = "avx512f")]
58972    unsafe fn test_mm_mask_sqrt_round_ss() {
58973        let src = _mm_set_ps(10., 11., 100., 110.);
58974        let a = _mm_set_ps(1., 2., 10., 20.);
58975        let b = _mm_set_ps(3., 4., 30., 4.);
58976        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58977        let e = _mm_set_ps(1., 2., 10., 110.);
58978        assert_eq_m128(r, e);
58979        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58980            src, 0b11111111, a, b,
58981        );
58982        let e = _mm_set_ps(1., 2., 10., 2.);
58983        assert_eq_m128(r, e);
58984    }
58985
58986    #[simd_test(enable = "avx512f")]
58987    unsafe fn test_mm_maskz_sqrt_round_ss() {
58988        let a = _mm_set_ps(1., 2., 10., 20.);
58989        let b = _mm_set_ps(3., 4., 30., 4.);
58990        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58991        let e = _mm_set_ps(1., 2., 10., 0.);
58992        assert_eq_m128(r, e);
58993        let r =
58994            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58995        let e = _mm_set_ps(1., 2., 10., 2.);
58996        assert_eq_m128(r, e);
58997    }
58998
58999    #[simd_test(enable = "avx512f")]
59000    unsafe fn test_mm_sqrt_round_sd() {
59001        let a = _mm_set_pd(1., 2.);
59002        let b = _mm_set_pd(3., 4.);
59003        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
59004        let e = _mm_set_pd(1., 2.);
59005        assert_eq_m128d(r, e);
59006    }
59007
59008    #[simd_test(enable = "avx512f")]
59009    unsafe fn test_mm_mask_sqrt_round_sd() {
59010        let src = _mm_set_pd(10., 11.);
59011        let a = _mm_set_pd(1., 2.);
59012        let b = _mm_set_pd(3., 4.);
59013        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59014        let e = _mm_set_pd(1., 11.);
59015        assert_eq_m128d(r, e);
59016        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59017            src, 0b11111111, a, b,
59018        );
59019        let e = _mm_set_pd(1., 2.);
59020        assert_eq_m128d(r, e);
59021    }
59022
59023    #[simd_test(enable = "avx512f")]
59024    unsafe fn test_mm_maskz_sqrt_round_sd() {
59025        let a = _mm_set_pd(1., 2.);
59026        let b = _mm_set_pd(3., 4.);
59027        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59028        let e = _mm_set_pd(1., 0.);
59029        assert_eq_m128d(r, e);
59030        let r =
59031            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59032        let e = _mm_set_pd(1., 2.);
59033        assert_eq_m128d(r, e);
59034    }
59035
59036    #[simd_test(enable = "avx512f")]
59037    unsafe fn test_mm_getexp_round_ss() {
59038        let a = _mm_set1_ps(2.);
59039        let b = _mm_set1_ps(3.);
59040        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59041        let e = _mm_set_ps(2., 2., 2., 1.);
59042        assert_eq_m128(r, e);
59043    }
59044
59045    #[simd_test(enable = "avx512f")]
59046    unsafe fn test_mm_mask_getexp_round_ss() {
59047        let a = _mm_set1_ps(2.);
59048        let b = _mm_set1_ps(3.);
59049        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59050        let e = _mm_set_ps(2., 2., 2., 2.);
59051        assert_eq_m128(r, e);
59052        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59053        let e = _mm_set_ps(2., 2., 2., 1.);
59054        assert_eq_m128(r, e);
59055    }
59056
59057    #[simd_test(enable = "avx512f")]
59058    unsafe fn test_mm_maskz_getexp_round_ss() {
59059        let a = _mm_set1_ps(2.);
59060        let b = _mm_set1_ps(3.);
59061        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59062        let e = _mm_set_ps(2., 2., 2., 0.);
59063        assert_eq_m128(r, e);
59064        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59065        let e = _mm_set_ps(2., 2., 2., 1.);
59066        assert_eq_m128(r, e);
59067    }
59068
59069    #[simd_test(enable = "avx512f")]
59070    unsafe fn test_mm_getexp_round_sd() {
59071        let a = _mm_set1_pd(2.);
59072        let b = _mm_set1_pd(3.);
59073        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59074        let e = _mm_set_pd(2., 1.);
59075        assert_eq_m128d(r, e);
59076    }
59077
59078    #[simd_test(enable = "avx512f")]
59079    unsafe fn test_mm_mask_getexp_round_sd() {
59080        let a = _mm_set1_pd(2.);
59081        let b = _mm_set1_pd(3.);
59082        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59083        let e = _mm_set_pd(2., 2.);
59084        assert_eq_m128d(r, e);
59085        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59086        let e = _mm_set_pd(2., 1.);
59087        assert_eq_m128d(r, e);
59088    }
59089
59090    #[simd_test(enable = "avx512f")]
59091    unsafe fn test_mm_maskz_getexp_round_sd() {
59092        let a = _mm_set1_pd(2.);
59093        let b = _mm_set1_pd(3.);
59094        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59095        let e = _mm_set_pd(2., 0.);
59096        assert_eq_m128d(r, e);
59097        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59098        let e = _mm_set_pd(2., 1.);
59099        assert_eq_m128d(r, e);
59100    }
59101
59102    #[simd_test(enable = "avx512f")]
59103    unsafe fn test_mm_getmant_round_ss() {
59104        let a = _mm_set1_ps(20.);
59105        let b = _mm_set1_ps(10.);
59106        let r =
59107            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59108                a, b,
59109            );
59110        let e = _mm_set_ps(20., 20., 20., 1.25);
59111        assert_eq_m128(r, e);
59112    }
59113
59114    #[simd_test(enable = "avx512f")]
59115    unsafe fn test_mm_mask_getmant_round_ss() {
59116        let a = _mm_set1_ps(20.);
59117        let b = _mm_set1_ps(10.);
59118        let r = _mm_mask_getmant_round_ss::<
59119            _MM_MANT_NORM_1_2,
59120            _MM_MANT_SIGN_SRC,
59121            _MM_FROUND_CUR_DIRECTION,
59122        >(a, 0, a, b);
59123        let e = _mm_set_ps(20., 20., 20., 20.);
59124        assert_eq_m128(r, e);
59125        let r = _mm_mask_getmant_round_ss::<
59126            _MM_MANT_NORM_1_2,
59127            _MM_MANT_SIGN_SRC,
59128            _MM_FROUND_CUR_DIRECTION,
59129        >(a, 0b11111111, a, b);
59130        let e = _mm_set_ps(20., 20., 20., 1.25);
59131        assert_eq_m128(r, e);
59132    }
59133
59134    #[simd_test(enable = "avx512f")]
59135    unsafe fn test_mm_maskz_getmant_round_ss() {
59136        let a = _mm_set1_ps(20.);
59137        let b = _mm_set1_ps(10.);
59138        let r = _mm_maskz_getmant_round_ss::<
59139            _MM_MANT_NORM_1_2,
59140            _MM_MANT_SIGN_SRC,
59141            _MM_FROUND_CUR_DIRECTION,
59142        >(0, a, b);
59143        let e = _mm_set_ps(20., 20., 20., 0.);
59144        assert_eq_m128(r, e);
59145        let r = _mm_maskz_getmant_round_ss::<
59146            _MM_MANT_NORM_1_2,
59147            _MM_MANT_SIGN_SRC,
59148            _MM_FROUND_CUR_DIRECTION,
59149        >(0b11111111, a, b);
59150        let e = _mm_set_ps(20., 20., 20., 1.25);
59151        assert_eq_m128(r, e);
59152    }
59153
59154    #[simd_test(enable = "avx512f")]
59155    unsafe fn test_mm_getmant_round_sd() {
59156        let a = _mm_set1_pd(20.);
59157        let b = _mm_set1_pd(10.);
59158        let r =
59159            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59160                a, b,
59161            );
59162        let e = _mm_set_pd(20., 1.25);
59163        assert_eq_m128d(r, e);
59164    }
59165
59166    #[simd_test(enable = "avx512f")]
59167    unsafe fn test_mm_mask_getmant_round_sd() {
59168        let a = _mm_set1_pd(20.);
59169        let b = _mm_set1_pd(10.);
59170        let r = _mm_mask_getmant_round_sd::<
59171            _MM_MANT_NORM_1_2,
59172            _MM_MANT_SIGN_SRC,
59173            _MM_FROUND_CUR_DIRECTION,
59174        >(a, 0, a, b);
59175        let e = _mm_set_pd(20., 20.);
59176        assert_eq_m128d(r, e);
59177        let r = _mm_mask_getmant_round_sd::<
59178            _MM_MANT_NORM_1_2,
59179            _MM_MANT_SIGN_SRC,
59180            _MM_FROUND_CUR_DIRECTION,
59181        >(a, 0b11111111, a, b);
59182        let e = _mm_set_pd(20., 1.25);
59183        assert_eq_m128d(r, e);
59184    }
59185
59186    #[simd_test(enable = "avx512f")]
59187    unsafe fn test_mm_maskz_getmant_round_sd() {
59188        let a = _mm_set1_pd(20.);
59189        let b = _mm_set1_pd(10.);
59190        let r = _mm_maskz_getmant_round_sd::<
59191            _MM_MANT_NORM_1_2,
59192            _MM_MANT_SIGN_SRC,
59193            _MM_FROUND_CUR_DIRECTION,
59194        >(0, a, b);
59195        let e = _mm_set_pd(20., 0.);
59196        assert_eq_m128d(r, e);
59197        let r = _mm_maskz_getmant_round_sd::<
59198            _MM_MANT_NORM_1_2,
59199            _MM_MANT_SIGN_SRC,
59200            _MM_FROUND_CUR_DIRECTION,
59201        >(0b11111111, a, b);
59202        let e = _mm_set_pd(20., 1.25);
59203        assert_eq_m128d(r, e);
59204    }
59205
59206    #[simd_test(enable = "avx512f")]
59207    unsafe fn test_mm_roundscale_round_ss() {
59208        let a = _mm_set1_ps(2.2);
59209        let b = _mm_set1_ps(1.1);
59210        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59211        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59212        assert_eq_m128(r, e);
59213    }
59214
59215    #[simd_test(enable = "avx512f")]
59216    unsafe fn test_mm_mask_roundscale_round_ss() {
59217        let a = _mm_set1_ps(2.2);
59218        let b = _mm_set1_ps(1.1);
59219        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59220        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59221        assert_eq_m128(r, e);
59222        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59223        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59224        assert_eq_m128(r, e);
59225    }
59226
59227    #[simd_test(enable = "avx512f")]
59228    unsafe fn test_mm_maskz_roundscale_round_ss() {
59229        let a = _mm_set1_ps(2.2);
59230        let b = _mm_set1_ps(1.1);
59231        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59232        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59233        assert_eq_m128(r, e);
59234        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59235        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59236        assert_eq_m128(r, e);
59237    }
59238
59239    #[simd_test(enable = "avx512f")]
59240    unsafe fn test_mm_roundscale_round_sd() {
59241        let a = _mm_set1_pd(2.2);
59242        let b = _mm_set1_pd(1.1);
59243        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59244        let e = _mm_set_pd(2.2, 1.0);
59245        assert_eq_m128d(r, e);
59246    }
59247
59248    #[simd_test(enable = "avx512f")]
59249    unsafe fn test_mm_mask_roundscale_round_sd() {
59250        let a = _mm_set1_pd(2.2);
59251        let b = _mm_set1_pd(1.1);
59252        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59253        let e = _mm_set_pd(2.2, 2.2);
59254        assert_eq_m128d(r, e);
59255        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59256        let e = _mm_set_pd(2.2, 1.0);
59257        assert_eq_m128d(r, e);
59258    }
59259
59260    #[simd_test(enable = "avx512f")]
59261    unsafe fn test_mm_maskz_roundscale_round_sd() {
59262        let a = _mm_set1_pd(2.2);
59263        let b = _mm_set1_pd(1.1);
59264        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59265        let e = _mm_set_pd(2.2, 0.0);
59266        assert_eq_m128d(r, e);
59267        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59268        let e = _mm_set_pd(2.2, 1.0);
59269        assert_eq_m128d(r, e);
59270    }
59271
59272    #[simd_test(enable = "avx512f")]
59273    unsafe fn test_mm_scalef_round_ss() {
59274        let a = _mm_set1_ps(1.);
59275        let b = _mm_set1_ps(3.);
59276        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59277        let e = _mm_set_ps(1., 1., 1., 8.);
59278        assert_eq_m128(r, e);
59279    }
59280
59281    #[simd_test(enable = "avx512f")]
59282    unsafe fn test_mm_mask_scalef_round_ss() {
59283        let a = _mm_set1_ps(1.);
59284        let b = _mm_set1_ps(3.);
59285        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59286            a, 0, a, b,
59287        );
59288        let e = _mm_set_ps(1., 1., 1., 1.);
59289        assert_eq_m128(r, e);
59290        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59291            a, 0b11111111, a, b,
59292        );
59293        let e = _mm_set_ps(1., 1., 1., 8.);
59294        assert_eq_m128(r, e);
59295    }
59296
59297    #[simd_test(enable = "avx512f")]
59298    unsafe fn test_mm_maskz_scalef_round_ss() {
59299        let a = _mm_set1_ps(1.);
59300        let b = _mm_set1_ps(3.);
59301        let r =
59302            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59303        let e = _mm_set_ps(1., 1., 1., 0.);
59304        assert_eq_m128(r, e);
59305        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59306            0b11111111, a, b,
59307        );
59308        let e = _mm_set_ps(1., 1., 1., 8.);
59309        assert_eq_m128(r, e);
59310    }
59311
59312    #[simd_test(enable = "avx512f")]
59313    unsafe fn test_mm_scalef_round_sd() {
59314        let a = _mm_set1_pd(1.);
59315        let b = _mm_set1_pd(3.);
59316        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59317        let e = _mm_set_pd(1., 8.);
59318        assert_eq_m128d(r, e);
59319    }
59320
59321    #[simd_test(enable = "avx512f")]
59322    unsafe fn test_mm_mask_scalef_round_sd() {
59323        let a = _mm_set1_pd(1.);
59324        let b = _mm_set1_pd(3.);
59325        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59326            a, 0, a, b,
59327        );
59328        let e = _mm_set_pd(1., 1.);
59329        assert_eq_m128d(r, e);
59330        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59331            a, 0b11111111, a, b,
59332        );
59333        let e = _mm_set_pd(1., 8.);
59334        assert_eq_m128d(r, e);
59335    }
59336
59337    #[simd_test(enable = "avx512f")]
59338    unsafe fn test_mm_maskz_scalef_round_sd() {
59339        let a = _mm_set1_pd(1.);
59340        let b = _mm_set1_pd(3.);
59341        let r =
59342            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59343        let e = _mm_set_pd(1., 0.);
59344        assert_eq_m128d(r, e);
59345        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59346            0b11111111, a, b,
59347        );
59348        let e = _mm_set_pd(1., 8.);
59349        assert_eq_m128d(r, e);
59350    }
59351
59352    #[simd_test(enable = "avx512f")]
59353    unsafe fn test_mm_fmadd_round_ss() {
59354        let a = _mm_set1_ps(1.);
59355        let b = _mm_set1_ps(2.);
59356        let c = _mm_set1_ps(3.);
59357        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59358        let e = _mm_set_ps(1., 1., 1., 5.);
59359        assert_eq_m128(r, e);
59360    }
59361
59362    #[simd_test(enable = "avx512f")]
59363    unsafe fn test_mm_mask_fmadd_round_ss() {
59364        let a = _mm_set1_ps(1.);
59365        let b = _mm_set1_ps(2.);
59366        let c = _mm_set1_ps(3.);
59367        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59368            a, 0, b, c,
59369        );
59370        assert_eq_m128(r, a);
59371        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59372            a, 0b11111111, b, c,
59373        );
59374        let e = _mm_set_ps(1., 1., 1., 5.);
59375        assert_eq_m128(r, e);
59376    }
59377
59378    #[simd_test(enable = "avx512f")]
59379    unsafe fn test_mm_maskz_fmadd_round_ss() {
59380        let a = _mm_set1_ps(1.);
59381        let b = _mm_set1_ps(2.);
59382        let c = _mm_set1_ps(3.);
59383        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59384            0, a, b, c,
59385        );
59386        let e = _mm_set_ps(1., 1., 1., 0.);
59387        assert_eq_m128(r, e);
59388        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59389            0b11111111, a, b, c,
59390        );
59391        let e = _mm_set_ps(1., 1., 1., 5.);
59392        assert_eq_m128(r, e);
59393    }
59394
59395    #[simd_test(enable = "avx512f")]
59396    unsafe fn test_mm_mask3_fmadd_round_ss() {
59397        let a = _mm_set1_ps(1.);
59398        let b = _mm_set1_ps(2.);
59399        let c = _mm_set1_ps(3.);
59400        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59401            a, b, c, 0,
59402        );
59403        assert_eq_m128(r, c);
59404        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59405            a, b, c, 0b11111111,
59406        );
59407        let e = _mm_set_ps(3., 3., 3., 5.);
59408        assert_eq_m128(r, e);
59409    }
59410
59411    #[simd_test(enable = "avx512f")]
59412    unsafe fn test_mm_fmadd_round_sd() {
59413        let a = _mm_set1_pd(1.);
59414        let b = _mm_set1_pd(2.);
59415        let c = _mm_set1_pd(3.);
59416        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59417        let e = _mm_set_pd(1., 5.);
59418        assert_eq_m128d(r, e);
59419    }
59420
59421    #[simd_test(enable = "avx512f")]
59422    unsafe fn test_mm_mask_fmadd_round_sd() {
59423        let a = _mm_set1_pd(1.);
59424        let b = _mm_set1_pd(2.);
59425        let c = _mm_set1_pd(3.);
59426        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59427            a, 0, b, c,
59428        );
59429        assert_eq_m128d(r, a);
59430        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59431            a, 0b11111111, b, c,
59432        );
59433        let e = _mm_set_pd(1., 5.);
59434        assert_eq_m128d(r, e);
59435    }
59436
59437    #[simd_test(enable = "avx512f")]
59438    unsafe fn test_mm_maskz_fmadd_round_sd() {
59439        let a = _mm_set1_pd(1.);
59440        let b = _mm_set1_pd(2.);
59441        let c = _mm_set1_pd(3.);
59442        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59443            0, a, b, c,
59444        );
59445        let e = _mm_set_pd(1., 0.);
59446        assert_eq_m128d(r, e);
59447        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59448            0b11111111, a, b, c,
59449        );
59450        let e = _mm_set_pd(1., 5.);
59451        assert_eq_m128d(r, e);
59452    }
59453
59454    #[simd_test(enable = "avx512f")]
59455    unsafe fn test_mm_mask3_fmadd_round_sd() {
59456        let a = _mm_set1_pd(1.);
59457        let b = _mm_set1_pd(2.);
59458        let c = _mm_set1_pd(3.);
59459        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59460            a, b, c, 0,
59461        );
59462        assert_eq_m128d(r, c);
59463        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59464            a, b, c, 0b11111111,
59465        );
59466        let e = _mm_set_pd(3., 5.);
59467        assert_eq_m128d(r, e);
59468    }
59469
59470    #[simd_test(enable = "avx512f")]
59471    unsafe fn test_mm_fmsub_round_ss() {
59472        let a = _mm_set1_ps(1.);
59473        let b = _mm_set1_ps(2.);
59474        let c = _mm_set1_ps(3.);
59475        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59476        let e = _mm_set_ps(1., 1., 1., -1.);
59477        assert_eq_m128(r, e);
59478    }
59479
59480    #[simd_test(enable = "avx512f")]
59481    unsafe fn test_mm_mask_fmsub_round_ss() {
59482        let a = _mm_set1_ps(1.);
59483        let b = _mm_set1_ps(2.);
59484        let c = _mm_set1_ps(3.);
59485        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59486            a, 0, b, c,
59487        );
59488        assert_eq_m128(r, a);
59489        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59490            a, 0b11111111, b, c,
59491        );
59492        let e = _mm_set_ps(1., 1., 1., -1.);
59493        assert_eq_m128(r, e);
59494    }
59495
59496    #[simd_test(enable = "avx512f")]
59497    unsafe fn test_mm_maskz_fmsub_round_ss() {
59498        let a = _mm_set1_ps(1.);
59499        let b = _mm_set1_ps(2.);
59500        let c = _mm_set1_ps(3.);
59501        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59502            0, a, b, c,
59503        );
59504        let e = _mm_set_ps(1., 1., 1., 0.);
59505        assert_eq_m128(r, e);
59506        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59507            0b11111111, a, b, c,
59508        );
59509        let e = _mm_set_ps(1., 1., 1., -1.);
59510        assert_eq_m128(r, e);
59511    }
59512
59513    #[simd_test(enable = "avx512f")]
59514    unsafe fn test_mm_mask3_fmsub_round_ss() {
59515        let a = _mm_set1_ps(1.);
59516        let b = _mm_set1_ps(2.);
59517        let c = _mm_set1_ps(3.);
59518        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59519            a, b, c, 0,
59520        );
59521        assert_eq_m128(r, c);
59522        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59523            a, b, c, 0b11111111,
59524        );
59525        let e = _mm_set_ps(3., 3., 3., -1.);
59526        assert_eq_m128(r, e);
59527    }
59528
59529    #[simd_test(enable = "avx512f")]
59530    unsafe fn test_mm_fmsub_round_sd() {
59531        let a = _mm_set1_pd(1.);
59532        let b = _mm_set1_pd(2.);
59533        let c = _mm_set1_pd(3.);
59534        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59535        let e = _mm_set_pd(1., -1.);
59536        assert_eq_m128d(r, e);
59537    }
59538
59539    #[simd_test(enable = "avx512f")]
59540    unsafe fn test_mm_mask_fmsub_round_sd() {
59541        let a = _mm_set1_pd(1.);
59542        let b = _mm_set1_pd(2.);
59543        let c = _mm_set1_pd(3.);
59544        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59545            a, 0, b, c,
59546        );
59547        assert_eq_m128d(r, a);
59548        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59549            a, 0b11111111, b, c,
59550        );
59551        let e = _mm_set_pd(1., -1.);
59552        assert_eq_m128d(r, e);
59553    }
59554
59555    #[simd_test(enable = "avx512f")]
59556    unsafe fn test_mm_maskz_fmsub_round_sd() {
59557        let a = _mm_set1_pd(1.);
59558        let b = _mm_set1_pd(2.);
59559        let c = _mm_set1_pd(3.);
59560        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59561            0, a, b, c,
59562        );
59563        let e = _mm_set_pd(1., 0.);
59564        assert_eq_m128d(r, e);
59565        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59566            0b11111111, a, b, c,
59567        );
59568        let e = _mm_set_pd(1., -1.);
59569        assert_eq_m128d(r, e);
59570    }
59571
59572    #[simd_test(enable = "avx512f")]
59573    unsafe fn test_mm_mask3_fmsub_round_sd() {
59574        let a = _mm_set1_pd(1.);
59575        let b = _mm_set1_pd(2.);
59576        let c = _mm_set1_pd(3.);
59577        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59578            a, b, c, 0,
59579        );
59580        assert_eq_m128d(r, c);
59581        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59582            a, b, c, 0b11111111,
59583        );
59584        let e = _mm_set_pd(3., -1.);
59585        assert_eq_m128d(r, e);
59586    }
59587
59588    #[simd_test(enable = "avx512f")]
59589    unsafe fn test_mm_fnmadd_round_ss() {
59590        let a = _mm_set1_ps(1.);
59591        let b = _mm_set1_ps(2.);
59592        let c = _mm_set1_ps(3.);
59593        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59594        let e = _mm_set_ps(1., 1., 1., 1.);
59595        assert_eq_m128(r, e);
59596    }
59597
59598    #[simd_test(enable = "avx512f")]
59599    unsafe fn test_mm_mask_fnmadd_round_ss() {
59600        let a = _mm_set1_ps(1.);
59601        let b = _mm_set1_ps(2.);
59602        let c = _mm_set1_ps(3.);
59603        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59604            a, 0, b, c,
59605        );
59606        assert_eq_m128(r, a);
59607        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59608            a, 0b11111111, b, c,
59609        );
59610        let e = _mm_set_ps(1., 1., 1., 1.);
59611        assert_eq_m128(r, e);
59612    }
59613
59614    #[simd_test(enable = "avx512f")]
59615    unsafe fn test_mm_maskz_fnmadd_round_ss() {
59616        let a = _mm_set1_ps(1.);
59617        let b = _mm_set1_ps(2.);
59618        let c = _mm_set1_ps(3.);
59619        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59620            0, a, b, c,
59621        );
59622        let e = _mm_set_ps(1., 1., 1., 0.);
59623        assert_eq_m128(r, e);
59624        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59625            0b11111111, a, b, c,
59626        );
59627        let e = _mm_set_ps(1., 1., 1., 1.);
59628        assert_eq_m128(r, e);
59629    }
59630
59631    #[simd_test(enable = "avx512f")]
59632    unsafe fn test_mm_mask3_fnmadd_round_ss() {
59633        let a = _mm_set1_ps(1.);
59634        let b = _mm_set1_ps(2.);
59635        let c = _mm_set1_ps(3.);
59636        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59637            a, b, c, 0,
59638        );
59639        assert_eq_m128(r, c);
59640        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59641            a, b, c, 0b11111111,
59642        );
59643        let e = _mm_set_ps(3., 3., 3., 1.);
59644        assert_eq_m128(r, e);
59645    }
59646
59647    #[simd_test(enable = "avx512f")]
59648    unsafe fn test_mm_fnmadd_round_sd() {
59649        let a = _mm_set1_pd(1.);
59650        let b = _mm_set1_pd(2.);
59651        let c = _mm_set1_pd(3.);
59652        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59653        let e = _mm_set_pd(1., 1.);
59654        assert_eq_m128d(r, e);
59655    }
59656
59657    #[simd_test(enable = "avx512f")]
59658    unsafe fn test_mm_mask_fnmadd_round_sd() {
59659        let a = _mm_set1_pd(1.);
59660        let b = _mm_set1_pd(2.);
59661        let c = _mm_set1_pd(3.);
59662        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59663            a, 0, b, c,
59664        );
59665        assert_eq_m128d(r, a);
59666        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59667            a, 0b11111111, b, c,
59668        );
59669        let e = _mm_set_pd(1., 1.);
59670        assert_eq_m128d(r, e);
59671    }
59672
59673    #[simd_test(enable = "avx512f")]
59674    unsafe fn test_mm_maskz_fnmadd_round_sd() {
59675        let a = _mm_set1_pd(1.);
59676        let b = _mm_set1_pd(2.);
59677        let c = _mm_set1_pd(3.);
59678        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59679            0, a, b, c,
59680        );
59681        let e = _mm_set_pd(1., 0.);
59682        assert_eq_m128d(r, e);
59683        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59684            0b11111111, a, b, c,
59685        );
59686        let e = _mm_set_pd(1., 1.);
59687        assert_eq_m128d(r, e);
59688    }
59689
59690    #[simd_test(enable = "avx512f")]
59691    unsafe fn test_mm_mask3_fnmadd_round_sd() {
59692        let a = _mm_set1_pd(1.);
59693        let b = _mm_set1_pd(2.);
59694        let c = _mm_set1_pd(3.);
59695        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59696            a, b, c, 0,
59697        );
59698        assert_eq_m128d(r, c);
59699        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59700            a, b, c, 0b11111111,
59701        );
59702        let e = _mm_set_pd(3., 1.);
59703        assert_eq_m128d(r, e);
59704    }
59705
59706    #[simd_test(enable = "avx512f")]
59707    unsafe fn test_mm_fnmsub_round_ss() {
59708        let a = _mm_set1_ps(1.);
59709        let b = _mm_set1_ps(2.);
59710        let c = _mm_set1_ps(3.);
59711        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59712        let e = _mm_set_ps(1., 1., 1., -5.);
59713        assert_eq_m128(r, e);
59714    }
59715
59716    #[simd_test(enable = "avx512f")]
59717    unsafe fn test_mm_mask_fnmsub_round_ss() {
59718        let a = _mm_set1_ps(1.);
59719        let b = _mm_set1_ps(2.);
59720        let c = _mm_set1_ps(3.);
59721        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59722            a, 0, b, c,
59723        );
59724        assert_eq_m128(r, a);
59725        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59726            a, 0b11111111, b, c,
59727        );
59728        let e = _mm_set_ps(1., 1., 1., -5.);
59729        assert_eq_m128(r, e);
59730    }
59731
59732    #[simd_test(enable = "avx512f")]
59733    unsafe fn test_mm_maskz_fnmsub_round_ss() {
59734        let a = _mm_set1_ps(1.);
59735        let b = _mm_set1_ps(2.);
59736        let c = _mm_set1_ps(3.);
59737        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59738            0, a, b, c,
59739        );
59740        let e = _mm_set_ps(1., 1., 1., 0.);
59741        assert_eq_m128(r, e);
59742        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59743            0b11111111, a, b, c,
59744        );
59745        let e = _mm_set_ps(1., 1., 1., -5.);
59746        assert_eq_m128(r, e);
59747    }
59748
59749    #[simd_test(enable = "avx512f")]
59750    unsafe fn test_mm_mask3_fnmsub_round_ss() {
59751        let a = _mm_set1_ps(1.);
59752        let b = _mm_set1_ps(2.);
59753        let c = _mm_set1_ps(3.);
59754        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59755            a, b, c, 0,
59756        );
59757        assert_eq_m128(r, c);
59758        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59759            a, b, c, 0b11111111,
59760        );
59761        let e = _mm_set_ps(3., 3., 3., -5.);
59762        assert_eq_m128(r, e);
59763    }
59764
59765    #[simd_test(enable = "avx512f")]
59766    unsafe fn test_mm_fnmsub_round_sd() {
59767        let a = _mm_set1_pd(1.);
59768        let b = _mm_set1_pd(2.);
59769        let c = _mm_set1_pd(3.);
59770        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59771        let e = _mm_set_pd(1., -5.);
59772        assert_eq_m128d(r, e);
59773    }
59774
59775    #[simd_test(enable = "avx512f")]
59776    unsafe fn test_mm_mask_fnmsub_round_sd() {
59777        let a = _mm_set1_pd(1.);
59778        let b = _mm_set1_pd(2.);
59779        let c = _mm_set1_pd(3.);
59780        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59781            a, 0, b, c,
59782        );
59783        assert_eq_m128d(r, a);
59784        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59785            a, 0b11111111, b, c,
59786        );
59787        let e = _mm_set_pd(1., -5.);
59788        assert_eq_m128d(r, e);
59789    }
59790
59791    #[simd_test(enable = "avx512f")]
59792    unsafe fn test_mm_maskz_fnmsub_round_sd() {
59793        let a = _mm_set1_pd(1.);
59794        let b = _mm_set1_pd(2.);
59795        let c = _mm_set1_pd(3.);
59796        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59797            0, a, b, c,
59798        );
59799        let e = _mm_set_pd(1., 0.);
59800        assert_eq_m128d(r, e);
59801        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59802            0b11111111, a, b, c,
59803        );
59804        let e = _mm_set_pd(1., -5.);
59805        assert_eq_m128d(r, e);
59806    }
59807
59808    #[simd_test(enable = "avx512f")]
59809    unsafe fn test_mm_mask3_fnmsub_round_sd() {
59810        let a = _mm_set1_pd(1.);
59811        let b = _mm_set1_pd(2.);
59812        let c = _mm_set1_pd(3.);
59813        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59814            a, b, c, 0,
59815        );
59816        assert_eq_m128d(r, c);
59817        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59818            a, b, c, 0b11111111,
59819        );
59820        let e = _mm_set_pd(3., -5.);
59821        assert_eq_m128d(r, e);
59822    }
59823
59824    #[simd_test(enable = "avx512f")]
59825    unsafe fn test_mm_fixupimm_ss() {
59826        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59827        let b = _mm_set1_ps(f32::MAX);
59828        let c = _mm_set1_epi32(i32::MAX);
59829        let r = _mm_fixupimm_ss::<5>(a, b, c);
59830        let e = _mm_set_ps(0., 0., 0., -0.0);
59831        assert_eq_m128(r, e);
59832    }
59833
59834    #[simd_test(enable = "avx512f")]
59835    unsafe fn test_mm_mask_fixupimm_ss() {
59836        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59837        let b = _mm_set1_ps(f32::MAX);
59838        let c = _mm_set1_epi32(i32::MAX);
59839        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59840        let e = _mm_set_ps(0., 0., 0., -0.0);
59841        assert_eq_m128(r, e);
59842    }
59843
59844    #[simd_test(enable = "avx512f")]
59845    unsafe fn test_mm_maskz_fixupimm_ss() {
59846        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59847        let b = _mm_set1_ps(f32::MAX);
59848        let c = _mm_set1_epi32(i32::MAX);
59849        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59850        let e = _mm_set_ps(0., 0., 0., 0.0);
59851        assert_eq_m128(r, e);
59852        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59853        let e = _mm_set_ps(0., 0., 0., -0.0);
59854        assert_eq_m128(r, e);
59855    }
59856
59857    #[simd_test(enable = "avx512f")]
59858    unsafe fn test_mm_fixupimm_sd() {
59859        let a = _mm_set_pd(0., f64::NAN);
59860        let b = _mm_set1_pd(f64::MAX);
59861        let c = _mm_set1_epi64x(i32::MAX as i64);
59862        let r = _mm_fixupimm_sd::<5>(a, b, c);
59863        let e = _mm_set_pd(0., -0.0);
59864        assert_eq_m128d(r, e);
59865    }
59866
59867    #[simd_test(enable = "avx512f")]
59868    unsafe fn test_mm_mask_fixupimm_sd() {
59869        let a = _mm_set_pd(0., f64::NAN);
59870        let b = _mm_set1_pd(f64::MAX);
59871        let c = _mm_set1_epi64x(i32::MAX as i64);
59872        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59873        let e = _mm_set_pd(0., -0.0);
59874        assert_eq_m128d(r, e);
59875    }
59876
59877    #[simd_test(enable = "avx512f")]
59878    unsafe fn test_mm_maskz_fixupimm_sd() {
59879        let a = _mm_set_pd(0., f64::NAN);
59880        let b = _mm_set1_pd(f64::MAX);
59881        let c = _mm_set1_epi64x(i32::MAX as i64);
59882        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59883        let e = _mm_set_pd(0., 0.0);
59884        assert_eq_m128d(r, e);
59885        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59886        let e = _mm_set_pd(0., -0.0);
59887        assert_eq_m128d(r, e);
59888    }
59889
59890    #[simd_test(enable = "avx512f")]
59891    unsafe fn test_mm_fixupimm_round_ss() {
59892        let a = _mm_set_ps(1., 0., 0., f32::NAN);
59893        let b = _mm_set1_ps(f32::MAX);
59894        let c = _mm_set1_epi32(i32::MAX);
59895        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59896        let e = _mm_set_ps(1., 0., 0., -0.0);
59897        assert_eq_m128(r, e);
59898    }
59899
59900    #[simd_test(enable = "avx512f")]
59901    unsafe fn test_mm_mask_fixupimm_round_ss() {
59902        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59903        let b = _mm_set1_ps(f32::MAX);
59904        let c = _mm_set1_epi32(i32::MAX);
59905        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59906        let e = _mm_set_ps(0., 0., 0., -0.0);
59907        assert_eq_m128(r, e);
59908    }
59909
59910    #[simd_test(enable = "avx512f")]
59911    unsafe fn test_mm_maskz_fixupimm_round_ss() {
59912        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59913        let b = _mm_set1_ps(f32::MAX);
59914        let c = _mm_set1_epi32(i32::MAX);
59915        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59916        let e = _mm_set_ps(0., 0., 0., 0.0);
59917        assert_eq_m128(r, e);
59918        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59919        let e = _mm_set_ps(0., 0., 0., -0.0);
59920        assert_eq_m128(r, e);
59921    }
59922
59923    #[simd_test(enable = "avx512f")]
59924    unsafe fn test_mm_fixupimm_round_sd() {
59925        let a = _mm_set_pd(0., f64::NAN);
59926        let b = _mm_set1_pd(f64::MAX);
59927        let c = _mm_set1_epi64x(i32::MAX as i64);
59928        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59929        let e = _mm_set_pd(0., -0.0);
59930        assert_eq_m128d(r, e);
59931    }
59932
59933    #[simd_test(enable = "avx512f")]
59934    unsafe fn test_mm_mask_fixupimm_round_sd() {
59935        let a = _mm_set_pd(0., f64::NAN);
59936        let b = _mm_set1_pd(f64::MAX);
59937        let c = _mm_set1_epi64x(i32::MAX as i64);
59938        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59939        let e = _mm_set_pd(0., -0.0);
59940        assert_eq_m128d(r, e);
59941    }
59942
59943    #[simd_test(enable = "avx512f")]
59944    unsafe fn test_mm_maskz_fixupimm_round_sd() {
59945        let a = _mm_set_pd(0., f64::NAN);
59946        let b = _mm_set1_pd(f64::MAX);
59947        let c = _mm_set1_epi64x(i32::MAX as i64);
59948        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59949        let e = _mm_set_pd(0., 0.0);
59950        assert_eq_m128d(r, e);
59951        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59952        let e = _mm_set_pd(0., -0.0);
59953        assert_eq_m128d(r, e);
59954    }
59955
59956    #[simd_test(enable = "avx512f")]
59957    unsafe fn test_mm_mask_cvtss_sd() {
59958        let a = _mm_set_pd(6., -7.5);
59959        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59960        let r = _mm_mask_cvtss_sd(a, 0, a, b);
59961        assert_eq_m128d(r, a);
59962        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
59963        let e = _mm_set_pd(6., -1.5);
59964        assert_eq_m128d(r, e);
59965    }
59966
59967    #[simd_test(enable = "avx512f")]
59968    unsafe fn test_mm_maskz_cvtss_sd() {
59969        let a = _mm_set_pd(6., -7.5);
59970        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59971        let r = _mm_maskz_cvtss_sd(0, a, b);
59972        let e = _mm_set_pd(6., 0.);
59973        assert_eq_m128d(r, e);
59974        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
59975        let e = _mm_set_pd(6., -1.5);
59976        assert_eq_m128d(r, e);
59977    }
59978
59979    #[simd_test(enable = "avx512f")]
59980    unsafe fn test_mm_mask_cvtsd_ss() {
59981        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59982        let b = _mm_set_pd(6., -7.5);
59983        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
59984        assert_eq_m128(r, a);
59985        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
59986        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59987        assert_eq_m128(r, e);
59988    }
59989
59990    #[simd_test(enable = "avx512f")]
59991    unsafe fn test_mm_maskz_cvtsd_ss() {
59992        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59993        let b = _mm_set_pd(6., -7.5);
59994        let r = _mm_maskz_cvtsd_ss(0, a, b);
59995        let e = _mm_set_ps(0., -0.5, 1., 0.);
59996        assert_eq_m128(r, e);
59997        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
59998        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59999        assert_eq_m128(r, e);
60000    }
60001
60002    #[simd_test(enable = "avx512f")]
60003    unsafe fn test_mm_cvt_roundss_sd() {
60004        let a = _mm_set_pd(6., -7.5);
60005        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60006        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60007        let e = _mm_set_pd(6., -1.5);
60008        assert_eq_m128d(r, e);
60009    }
60010
60011    #[simd_test(enable = "avx512f")]
60012    unsafe fn test_mm_mask_cvt_roundss_sd() {
60013        let a = _mm_set_pd(6., -7.5);
60014        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60015        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60016        assert_eq_m128d(r, a);
60017        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60018        let e = _mm_set_pd(6., -1.5);
60019        assert_eq_m128d(r, e);
60020    }
60021
60022    #[simd_test(enable = "avx512f")]
60023    unsafe fn test_mm_maskz_cvt_roundss_sd() {
60024        let a = _mm_set_pd(6., -7.5);
60025        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60026        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60027        let e = _mm_set_pd(6., 0.);
60028        assert_eq_m128d(r, e);
60029        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60030        let e = _mm_set_pd(6., -1.5);
60031        assert_eq_m128d(r, e);
60032    }
60033
60034    #[simd_test(enable = "avx512f")]
60035    unsafe fn test_mm_cvt_roundsd_ss() {
60036        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60037        let b = _mm_set_pd(6., -7.5);
60038        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60039        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60040        assert_eq_m128(r, e);
60041    }
60042
60043    #[simd_test(enable = "avx512f")]
60044    unsafe fn test_mm_mask_cvt_roundsd_ss() {
60045        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60046        let b = _mm_set_pd(6., -7.5);
60047        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60048        assert_eq_m128(r, a);
60049        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60050            a, 0b11111111, a, b,
60051        );
60052        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60053        assert_eq_m128(r, e);
60054    }
60055
60056    #[simd_test(enable = "avx512f")]
60057    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60058        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60059        let b = _mm_set_pd(6., -7.5);
60060        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60061        let e = _mm_set_ps(0., -0.5, 1., 0.);
60062        assert_eq_m128(r, e);
60063        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60064            0b11111111, a, b,
60065        );
60066        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60067        assert_eq_m128(r, e);
60068    }
60069
60070    #[simd_test(enable = "avx512f")]
60071    unsafe fn test_mm_cvt_roundss_si32() {
60072        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60073        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60074        let e: i32 = -1;
60075        assert_eq!(r, e);
60076    }
60077
60078    #[simd_test(enable = "avx512f")]
60079    unsafe fn test_mm_cvt_roundss_i32() {
60080        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60081        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60082        let e: i32 = -1;
60083        assert_eq!(r, e);
60084    }
60085
60086    #[simd_test(enable = "avx512f")]
60087    unsafe fn test_mm_cvt_roundss_u32() {
60088        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60089        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60090        let e: u32 = u32::MAX;
60091        assert_eq!(r, e);
60092    }
60093
60094    #[simd_test(enable = "avx512f")]
60095    unsafe fn test_mm_cvtss_i32() {
60096        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60097        let r = _mm_cvtss_i32(a);
60098        let e: i32 = -2;
60099        assert_eq!(r, e);
60100    }
60101
60102    #[simd_test(enable = "avx512f")]
60103    unsafe fn test_mm_cvtss_u32() {
60104        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60105        let r = _mm_cvtss_u32(a);
60106        let e: u32 = u32::MAX;
60107        assert_eq!(r, e);
60108    }
60109
60110    #[simd_test(enable = "avx512f")]
60111    unsafe fn test_mm_cvt_roundsd_si32() {
60112        let a = _mm_set_pd(1., -1.5);
60113        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60114        let e: i32 = -1;
60115        assert_eq!(r, e);
60116    }
60117
60118    #[simd_test(enable = "avx512f")]
60119    unsafe fn test_mm_cvt_roundsd_i32() {
60120        let a = _mm_set_pd(1., -1.5);
60121        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60122        let e: i32 = -1;
60123        assert_eq!(r, e);
60124    }
60125
60126    #[simd_test(enable = "avx512f")]
60127    unsafe fn test_mm_cvt_roundsd_u32() {
60128        let a = _mm_set_pd(1., -1.5);
60129        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60130        let e: u32 = u32::MAX;
60131        assert_eq!(r, e);
60132    }
60133
60134    #[simd_test(enable = "avx512f")]
60135    unsafe fn test_mm_cvtsd_i32() {
60136        let a = _mm_set_pd(1., -1.5);
60137        let r = _mm_cvtsd_i32(a);
60138        let e: i32 = -2;
60139        assert_eq!(r, e);
60140    }
60141
60142    #[simd_test(enable = "avx512f")]
60143    unsafe fn test_mm_cvtsd_u32() {
60144        let a = _mm_set_pd(1., -1.5);
60145        let r = _mm_cvtsd_u32(a);
60146        let e: u32 = u32::MAX;
60147        assert_eq!(r, e);
60148    }
60149
60150    #[simd_test(enable = "avx512f")]
60151    unsafe fn test_mm_cvt_roundi32_ss() {
60152        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60153        let b: i32 = 9;
60154        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60155        let e = _mm_set_ps(0., -0.5, 1., 9.);
60156        assert_eq_m128(r, e);
60157    }
60158
60159    #[simd_test(enable = "avx512f")]
60160    unsafe fn test_mm_cvt_roundsi32_ss() {
60161        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60162        let b: i32 = 9;
60163        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60164        let e = _mm_set_ps(0., -0.5, 1., 9.);
60165        assert_eq_m128(r, e);
60166    }
60167
60168    #[simd_test(enable = "avx512f")]
60169    unsafe fn test_mm_cvt_roundu32_ss() {
60170        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60171        let b: u32 = 9;
60172        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60173        let e = _mm_set_ps(0., -0.5, 1., 9.);
60174        assert_eq_m128(r, e);
60175    }
60176
60177    #[simd_test(enable = "avx512f")]
60178    unsafe fn test_mm_cvti32_ss() {
60179        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60180        let b: i32 = 9;
60181        let r = _mm_cvti32_ss(a, b);
60182        let e = _mm_set_ps(0., -0.5, 1., 9.);
60183        assert_eq_m128(r, e);
60184    }
60185
60186    #[simd_test(enable = "avx512f")]
60187    unsafe fn test_mm_cvti32_sd() {
60188        let a = _mm_set_pd(1., -1.5);
60189        let b: i32 = 9;
60190        let r = _mm_cvti32_sd(a, b);
60191        let e = _mm_set_pd(1., 9.);
60192        assert_eq_m128d(r, e);
60193    }
60194
60195    #[simd_test(enable = "avx512f")]
60196    unsafe fn test_mm_cvtt_roundss_si32() {
60197        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60198        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60199        let e: i32 = -1;
60200        assert_eq!(r, e);
60201    }
60202
60203    #[simd_test(enable = "avx512f")]
60204    unsafe fn test_mm_cvtt_roundss_i32() {
60205        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60206        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60207        let e: i32 = -1;
60208        assert_eq!(r, e);
60209    }
60210
60211    #[simd_test(enable = "avx512f")]
60212    unsafe fn test_mm_cvtt_roundss_u32() {
60213        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60214        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60215        let e: u32 = u32::MAX;
60216        assert_eq!(r, e);
60217    }
60218
60219    #[simd_test(enable = "avx512f")]
60220    unsafe fn test_mm_cvttss_i32() {
60221        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60222        let r = _mm_cvttss_i32(a);
60223        let e: i32 = -1;
60224        assert_eq!(r, e);
60225    }
60226
60227    #[simd_test(enable = "avx512f")]
60228    unsafe fn test_mm_cvttss_u32() {
60229        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60230        let r = _mm_cvttss_u32(a);
60231        let e: u32 = u32::MAX;
60232        assert_eq!(r, e);
60233    }
60234
60235    #[simd_test(enable = "avx512f")]
60236    unsafe fn test_mm_cvtt_roundsd_si32() {
60237        let a = _mm_set_pd(1., -1.5);
60238        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60239        let e: i32 = -1;
60240        assert_eq!(r, e);
60241    }
60242
60243    #[simd_test(enable = "avx512f")]
60244    unsafe fn test_mm_cvtt_roundsd_i32() {
60245        let a = _mm_set_pd(1., -1.5);
60246        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60247        let e: i32 = -1;
60248        assert_eq!(r, e);
60249    }
60250
60251    #[simd_test(enable = "avx512f")]
60252    unsafe fn test_mm_cvtt_roundsd_u32() {
60253        let a = _mm_set_pd(1., -1.5);
60254        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60255        let e: u32 = u32::MAX;
60256        assert_eq!(r, e);
60257    }
60258
60259    #[simd_test(enable = "avx512f")]
60260    unsafe fn test_mm_cvttsd_i32() {
60261        let a = _mm_set_pd(1., -1.5);
60262        let r = _mm_cvttsd_i32(a);
60263        let e: i32 = -1;
60264        assert_eq!(r, e);
60265    }
60266
60267    #[simd_test(enable = "avx512f")]
60268    unsafe fn test_mm_cvttsd_u32() {
60269        let a = _mm_set_pd(1., -1.5);
60270        let r = _mm_cvttsd_u32(a);
60271        let e: u32 = u32::MAX;
60272        assert_eq!(r, e);
60273    }
60274
60275    #[simd_test(enable = "avx512f")]
60276    unsafe fn test_mm_cvtu32_ss() {
60277        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60278        let b: u32 = 9;
60279        let r = _mm_cvtu32_ss(a, b);
60280        let e = _mm_set_ps(0., -0.5, 1., 9.);
60281        assert_eq_m128(r, e);
60282    }
60283
60284    #[simd_test(enable = "avx512f")]
60285    unsafe fn test_mm_cvtu32_sd() {
60286        let a = _mm_set_pd(1., -1.5);
60287        let b: u32 = 9;
60288        let r = _mm_cvtu32_sd(a, b);
60289        let e = _mm_set_pd(1., 9.);
60290        assert_eq_m128d(r, e);
60291    }
60292
60293    #[simd_test(enable = "avx512f")]
60294    unsafe fn test_mm_comi_round_ss() {
60295        let a = _mm_set1_ps(2.2);
60296        let b = _mm_set1_ps(1.1);
60297        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60298        let e: i32 = 0;
60299        assert_eq!(r, e);
60300    }
60301
60302    #[simd_test(enable = "avx512f")]
60303    unsafe fn test_mm_comi_round_sd() {
60304        let a = _mm_set1_pd(2.2);
60305        let b = _mm_set1_pd(1.1);
60306        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60307        let e: i32 = 0;
60308        assert_eq!(r, e);
60309    }
60310
60311    #[simd_test(enable = "avx512f")]
60312    unsafe fn test_mm512_cvtsi512_si32() {
60313        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60314        let r = _mm512_cvtsi512_si32(a);
60315        let e: i32 = 1;
60316        assert_eq!(r, e);
60317    }
60318
60319    #[simd_test(enable = "avx512f")]
60320    unsafe fn test_mm512_cvtss_f32() {
60321        let a = _mm512_setr_ps(
60322            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60323        );
60324        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60325    }
60326
60327    #[simd_test(enable = "avx512f")]
60328    unsafe fn test_mm512_cvtsd_f64() {
60329        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60330        assert_eq!(r, -1.1);
60331    }
60332
60333    #[simd_test(enable = "avx512f")]
60334    unsafe fn test_mm512_shuffle_pd() {
60335        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60336        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60337        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60338        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60339        assert_eq_m512d(r, e);
60340    }
60341
60342    #[simd_test(enable = "avx512f")]
60343    unsafe fn test_mm512_mask_shuffle_pd() {
60344        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60345        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60346        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60347        assert_eq_m512d(r, a);
60348        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60349        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60350        assert_eq_m512d(r, e);
60351    }
60352
60353    #[simd_test(enable = "avx512f")]
60354    unsafe fn test_mm512_maskz_shuffle_pd() {
60355        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60356        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60357        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60358        assert_eq_m512d(r, _mm512_setzero_pd());
60359        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60360        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60361        assert_eq_m512d(r, e);
60362    }
60363
60364    #[simd_test(enable = "avx512f")]
60365    unsafe fn test_mm512_mask_expandloadu_epi32() {
60366        let src = _mm512_set1_epi32(42);
60367        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60368        let p = a.as_ptr();
60369        let m = 0b11101000_11001010;
60370        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60371        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60372        assert_eq_m512i(r, e);
60373    }
60374
60375    #[simd_test(enable = "avx512f")]
60376    unsafe fn test_mm512_maskz_expandloadu_epi32() {
60377        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60378        let p = a.as_ptr();
60379        let m = 0b11101000_11001010;
60380        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60381        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60382        assert_eq_m512i(r, e);
60383    }
60384
60385    #[simd_test(enable = "avx512f,avx512vl")]
60386    unsafe fn test_mm256_mask_expandloadu_epi32() {
60387        let src = _mm256_set1_epi32(42);
60388        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60389        let p = a.as_ptr();
60390        let m = 0b11101000;
60391        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60392        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60393        assert_eq_m256i(r, e);
60394    }
60395
60396    #[simd_test(enable = "avx512f,avx512vl")]
60397    unsafe fn test_mm256_maskz_expandloadu_epi32() {
60398        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60399        let p = a.as_ptr();
60400        let m = 0b11101000;
60401        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60402        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60403        assert_eq_m256i(r, e);
60404    }
60405
60406    #[simd_test(enable = "avx512f,avx512vl")]
60407    unsafe fn test_mm_mask_expandloadu_epi32() {
60408        let src = _mm_set1_epi32(42);
60409        let a = &[1_i32, 2, 3, 4];
60410        let p = a.as_ptr();
60411        let m = 0b11111000;
60412        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60413        let e = _mm_set_epi32(1, 42, 42, 42);
60414        assert_eq_m128i(r, e);
60415    }
60416
60417    #[simd_test(enable = "avx512f,avx512vl")]
60418    unsafe fn test_mm_maskz_expandloadu_epi32() {
60419        let a = &[1_i32, 2, 3, 4];
60420        let p = a.as_ptr();
60421        let m = 0b11111000;
60422        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60423        let e = _mm_set_epi32(1, 0, 0, 0);
60424        assert_eq_m128i(r, e);
60425    }
60426
60427    #[simd_test(enable = "avx512f")]
60428    unsafe fn test_mm512_mask_expandloadu_epi64() {
60429        let src = _mm512_set1_epi64(42);
60430        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60431        let p = a.as_ptr();
60432        let m = 0b11101000;
60433        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60434        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60435        assert_eq_m512i(r, e);
60436    }
60437
60438    #[simd_test(enable = "avx512f")]
60439    unsafe fn test_mm512_maskz_expandloadu_epi64() {
60440        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60441        let p = a.as_ptr();
60442        let m = 0b11101000;
60443        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60444        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60445        assert_eq_m512i(r, e);
60446    }
60447
60448    #[simd_test(enable = "avx512f,avx512vl")]
60449    unsafe fn test_mm256_mask_expandloadu_epi64() {
60450        let src = _mm256_set1_epi64x(42);
60451        let a = &[1_i64, 2, 3, 4];
60452        let p = a.as_ptr();
60453        let m = 0b11101000;
60454        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60455        let e = _mm256_set_epi64x(1, 42, 42, 42);
60456        assert_eq_m256i(r, e);
60457    }
60458
60459    #[simd_test(enable = "avx512f,avx512vl")]
60460    unsafe fn test_mm256_maskz_expandloadu_epi64() {
60461        let a = &[1_i64, 2, 3, 4];
60462        let p = a.as_ptr();
60463        let m = 0b11101000;
60464        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60465        let e = _mm256_set_epi64x(1, 0, 0, 0);
60466        assert_eq_m256i(r, e);
60467    }
60468
60469    #[simd_test(enable = "avx512f,avx512vl")]
60470    unsafe fn test_mm_mask_expandloadu_epi64() {
60471        let src = _mm_set1_epi64x(42);
60472        let a = &[1_i64, 2];
60473        let p = a.as_ptr();
60474        let m = 0b11101000;
60475        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60476        let e = _mm_set_epi64x(42, 42);
60477        assert_eq_m128i(r, e);
60478    }
60479
60480    #[simd_test(enable = "avx512f,avx512vl")]
60481    unsafe fn test_mm_maskz_expandloadu_epi64() {
60482        let a = &[1_i64, 2];
60483        let p = a.as_ptr();
60484        let m = 0b11101000;
60485        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60486        let e = _mm_set_epi64x(0, 0);
60487        assert_eq_m128i(r, e);
60488    }
60489
60490    #[simd_test(enable = "avx512f")]
60491    unsafe fn test_mm512_mask_expandloadu_ps() {
60492        let src = _mm512_set1_ps(42.);
60493        let a = &[
60494            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60495        ];
60496        let p = a.as_ptr();
60497        let m = 0b11101000_11001010;
60498        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60499        let e = _mm512_set_ps(
60500            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60501        );
60502        assert_eq_m512(r, e);
60503    }
60504
60505    #[simd_test(enable = "avx512f")]
60506    unsafe fn test_mm512_maskz_expandloadu_ps() {
60507        let a = &[
60508            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60509        ];
60510        let p = a.as_ptr();
60511        let m = 0b11101000_11001010;
60512        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60513        let e = _mm512_set_ps(
60514            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60515        );
60516        assert_eq_m512(r, e);
60517    }
60518
60519    #[simd_test(enable = "avx512f,avx512vl")]
60520    unsafe fn test_mm256_mask_expandloadu_ps() {
60521        let src = _mm256_set1_ps(42.);
60522        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60523        let p = a.as_ptr();
60524        let m = 0b11101000;
60525        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60526        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60527        assert_eq_m256(r, e);
60528    }
60529
60530    #[simd_test(enable = "avx512f,avx512vl")]
60531    unsafe fn test_mm256_maskz_expandloadu_ps() {
60532        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60533        let p = a.as_ptr();
60534        let m = 0b11101000;
60535        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60536        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60537        assert_eq_m256(r, e);
60538    }
60539
60540    #[simd_test(enable = "avx512f,avx512vl")]
60541    unsafe fn test_mm_mask_expandloadu_ps() {
60542        let src = _mm_set1_ps(42.);
60543        let a = &[1.0f32, 2., 3., 4.];
60544        let p = a.as_ptr();
60545        let m = 0b11101000;
60546        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60547        let e = _mm_set_ps(1., 42., 42., 42.);
60548        assert_eq_m128(r, e);
60549    }
60550
60551    #[simd_test(enable = "avx512f,avx512vl")]
60552    unsafe fn test_mm_maskz_expandloadu_ps() {
60553        let a = &[1.0f32, 2., 3., 4.];
60554        let p = a.as_ptr();
60555        let m = 0b11101000;
60556        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60557        let e = _mm_set_ps(1., 0., 0., 0.);
60558        assert_eq_m128(r, e);
60559    }
60560
60561    #[simd_test(enable = "avx512f")]
60562    unsafe fn test_mm512_mask_expandloadu_pd() {
60563        let src = _mm512_set1_pd(42.);
60564        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60565        let p = a.as_ptr();
60566        let m = 0b11101000;
60567        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60568        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60569        assert_eq_m512d(r, e);
60570    }
60571
60572    #[simd_test(enable = "avx512f")]
60573    unsafe fn test_mm512_maskz_expandloadu_pd() {
60574        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60575        let p = a.as_ptr();
60576        let m = 0b11101000;
60577        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60578        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60579        assert_eq_m512d(r, e);
60580    }
60581
60582    #[simd_test(enable = "avx512f,avx512vl")]
60583    unsafe fn test_mm256_mask_expandloadu_pd() {
60584        let src = _mm256_set1_pd(42.);
60585        let a = &[1.0f64, 2., 3., 4.];
60586        let p = a.as_ptr();
60587        let m = 0b11101000;
60588        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60589        let e = _mm256_set_pd(1., 42., 42., 42.);
60590        assert_eq_m256d(r, e);
60591    }
60592
60593    #[simd_test(enable = "avx512f,avx512vl")]
60594    unsafe fn test_mm256_maskz_expandloadu_pd() {
60595        let a = &[1.0f64, 2., 3., 4.];
60596        let p = a.as_ptr();
60597        let m = 0b11101000;
60598        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60599        let e = _mm256_set_pd(1., 0., 0., 0.);
60600        assert_eq_m256d(r, e);
60601    }
60602
60603    #[simd_test(enable = "avx512f,avx512vl")]
60604    unsafe fn test_mm_mask_expandloadu_pd() {
60605        let src = _mm_set1_pd(42.);
60606        let a = &[1.0f64, 2.];
60607        let p = a.as_ptr();
60608        let m = 0b11101000;
60609        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60610        let e = _mm_set_pd(42., 42.);
60611        assert_eq_m128d(r, e);
60612    }
60613
60614    #[simd_test(enable = "avx512f,avx512vl")]
60615    unsafe fn test_mm_maskz_expandloadu_pd() {
60616        let a = &[1.0f64, 2.];
60617        let p = a.as_ptr();
60618        let m = 0b11101000;
60619        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60620        let e = _mm_set_pd(0., 0.);
60621        assert_eq_m128d(r, e);
60622    }
60623}