1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 unsafe {
36 let a = a.as_i32x8();
37 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
38 transmute(r)
39 }
40}
41
42#[inline]
46#[target_feature(enable = "avx2")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
50 unsafe {
51 let a = a.as_i16x16();
52 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
53 transmute(r)
54 }
55}
56
57#[inline]
61#[target_feature(enable = "avx2")]
62#[cfg_attr(test, assert_instr(vpabsb))]
63#[stable(feature = "simd_x86", since = "1.27.0")]
64pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
65 unsafe {
66 let a = a.as_i8x32();
67 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
68 transmute(r)
69 }
70}
71
72#[inline]
76#[target_feature(enable = "avx2")]
77#[cfg_attr(test, assert_instr(vpaddq))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
80 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
81}
82
83#[inline]
87#[target_feature(enable = "avx2")]
88#[cfg_attr(test, assert_instr(vpaddd))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
91 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "avx2")]
99#[cfg_attr(test, assert_instr(vpaddw))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
102 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
103}
104
105#[inline]
109#[target_feature(enable = "avx2")]
110#[cfg_attr(test, assert_instr(vpaddb))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
113 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
114}
115
116#[inline]
120#[target_feature(enable = "avx2")]
121#[cfg_attr(test, assert_instr(vpaddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
131#[target_feature(enable = "avx2")]
132#[cfg_attr(test, assert_instr(vpaddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
136}
137
138#[inline]
142#[target_feature(enable = "avx2")]
143#[cfg_attr(test, assert_instr(vpaddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
147}
148
149#[inline]
153#[target_feature(enable = "avx2")]
154#[cfg_attr(test, assert_instr(vpaddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
158}
159
160#[inline]
165#[target_feature(enable = "avx2")]
166#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
167#[rustc_legacy_const_generics(2)]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
170 static_assert_uimm_bits!(IMM8, 8);
171
172 if IMM8 >= 32 {
175 return _mm256_setzero_si256();
176 }
177 let (a, b) = if IMM8 > 16 {
180 (_mm256_setzero_si256(), a)
181 } else {
182 (a, b)
183 };
184 unsafe {
185 if IMM8 == 16 {
186 return transmute(a);
187 }
188 }
189 const fn mask(shift: u32, i: u32) -> u32 {
190 let shift = shift % 16;
191 let mod_i = i % 16;
192 if mod_i < (16 - shift) {
193 i + shift
194 } else {
195 i + 16 + shift
196 }
197 }
198
199 unsafe {
200 let r: i8x32 = simd_shuffle!(
201 b.as_i8x32(),
202 a.as_i8x32(),
203 [
204 mask(IMM8 as u32, 0),
205 mask(IMM8 as u32, 1),
206 mask(IMM8 as u32, 2),
207 mask(IMM8 as u32, 3),
208 mask(IMM8 as u32, 4),
209 mask(IMM8 as u32, 5),
210 mask(IMM8 as u32, 6),
211 mask(IMM8 as u32, 7),
212 mask(IMM8 as u32, 8),
213 mask(IMM8 as u32, 9),
214 mask(IMM8 as u32, 10),
215 mask(IMM8 as u32, 11),
216 mask(IMM8 as u32, 12),
217 mask(IMM8 as u32, 13),
218 mask(IMM8 as u32, 14),
219 mask(IMM8 as u32, 15),
220 mask(IMM8 as u32, 16),
221 mask(IMM8 as u32, 17),
222 mask(IMM8 as u32, 18),
223 mask(IMM8 as u32, 19),
224 mask(IMM8 as u32, 20),
225 mask(IMM8 as u32, 21),
226 mask(IMM8 as u32, 22),
227 mask(IMM8 as u32, 23),
228 mask(IMM8 as u32, 24),
229 mask(IMM8 as u32, 25),
230 mask(IMM8 as u32, 26),
231 mask(IMM8 as u32, 27),
232 mask(IMM8 as u32, 28),
233 mask(IMM8 as u32, 29),
234 mask(IMM8 as u32, 30),
235 mask(IMM8 as u32, 31),
236 ],
237 );
238 transmute(r)
239 }
240}
241
242#[inline]
247#[target_feature(enable = "avx2")]
248#[cfg_attr(test, assert_instr(vandps))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
251 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandnps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
263 unsafe {
264 let all_ones = _mm256_set1_epi8(-1);
265 transmute(simd_and(
266 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
267 b.as_i64x4(),
268 ))
269 }
270}
271
272#[inline]
276#[target_feature(enable = "avx2")]
277#[cfg_attr(test, assert_instr(vpavgw))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
280 unsafe {
281 let a = simd_cast::<_, u32x16>(a.as_u16x16());
282 let b = simd_cast::<_, u32x16>(b.as_u16x16());
283 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
284 transmute(simd_cast::<_, u16x16>(r))
285 }
286}
287
288#[inline]
292#[target_feature(enable = "avx2")]
293#[cfg_attr(test, assert_instr(vpavgb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
296 unsafe {
297 let a = simd_cast::<_, u16x32>(a.as_u8x32());
298 let b = simd_cast::<_, u16x32>(b.as_u8x32());
299 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
300 transmute(simd_cast::<_, u8x32>(r))
301 }
302}
303
304#[inline]
308#[target_feature(enable = "avx2")]
309#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
310#[rustc_legacy_const_generics(2)]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
313 static_assert_uimm_bits!(IMM4, 4);
314 unsafe {
315 let a = a.as_i32x4();
316 let b = b.as_i32x4();
317 let r: i32x4 = simd_shuffle!(
318 a,
319 b,
320 [
321 [0, 4, 0, 4][IMM4 as usize & 0b11],
322 [1, 1, 5, 5][IMM4 as usize & 0b11],
323 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
324 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
325 ],
326 );
327 transmute(r)
328 }
329}
330
331#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
337#[rustc_legacy_const_generics(2)]
338#[stable(feature = "simd_x86", since = "1.27.0")]
339pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
340 static_assert_uimm_bits!(IMM8, 8);
341 unsafe {
342 let a = a.as_i32x8();
343 let b = b.as_i32x8();
344 let r: i32x8 = simd_shuffle!(
345 a,
346 b,
347 [
348 [0, 8, 0, 8][IMM8 as usize & 0b11],
349 [1, 1, 9, 9][IMM8 as usize & 0b11],
350 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
351 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
352 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
353 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
354 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
355 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
356 ],
357 );
358 transmute(r)
359 }
360}
361
362#[inline]
366#[target_feature(enable = "avx2")]
367#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
368#[rustc_legacy_const_generics(2)]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
371 static_assert_uimm_bits!(IMM8, 8);
372 unsafe {
373 let a = a.as_i16x16();
374 let b = b.as_i16x16();
375
376 let r: i16x16 = simd_shuffle!(
377 a,
378 b,
379 [
380 [0, 16, 0, 16][IMM8 as usize & 0b11],
381 [1, 1, 17, 17][IMM8 as usize & 0b11],
382 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
383 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
384 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
385 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
386 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
387 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
388 [8, 24, 8, 24][IMM8 as usize & 0b11],
389 [9, 9, 25, 25][IMM8 as usize & 0b11],
390 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
391 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
392 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
393 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
394 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
395 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
396 ],
397 );
398 transmute(r)
399 }
400}
401
402#[inline]
406#[target_feature(enable = "avx2")]
407#[cfg_attr(test, assert_instr(vpblendvb))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
410 unsafe {
411 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
412 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
413 }
414}
415
416#[inline]
421#[target_feature(enable = "avx2")]
422#[cfg_attr(test, assert_instr(vpbroadcastb))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
425 unsafe {
426 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
427 transmute::<i8x16, _>(ret)
428 }
429}
430
431#[inline]
436#[target_feature(enable = "avx2")]
437#[cfg_attr(test, assert_instr(vpbroadcastb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
440 unsafe {
441 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
442 transmute::<i8x32, _>(ret)
443 }
444}
445
446#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vbroadcastss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
457 unsafe {
458 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
459 transmute::<i32x4, _>(ret)
460 }
461}
462
463#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vbroadcastss))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
474 unsafe {
475 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
476 transmute::<i32x8, _>(ret)
477 }
478}
479
480#[inline]
485#[target_feature(enable = "avx2")]
486#[cfg_attr(test, assert_instr(vmovddup))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
491 unsafe {
492 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
493 transmute::<i64x2, _>(ret)
494 }
495}
496
497#[inline]
502#[target_feature(enable = "avx2")]
503#[cfg_attr(test, assert_instr(vbroadcastsd))]
504#[stable(feature = "simd_x86", since = "1.27.0")]
505pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
506 unsafe {
507 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
508 transmute::<i64x4, _>(ret)
509 }
510}
511
512#[inline]
517#[target_feature(enable = "avx2")]
518#[cfg_attr(test, assert_instr(vmovddup))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
521 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
522}
523
524#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastsd))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
533 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
534}
535
536#[inline]
541#[target_feature(enable = "avx2")]
542#[stable(feature = "simd_x86_updates", since = "1.82.0")]
543pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
546 transmute::<i64x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
560 unsafe {
561 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
562 transmute::<i64x4, _>(ret)
563 }
564}
565
566#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vbroadcastss))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
575 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
576}
577
578#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastss))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
587 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
588}
589
590#[inline]
595#[target_feature(enable = "avx2")]
596#[cfg_attr(test, assert_instr(vpbroadcastw))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
599 unsafe {
600 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
601 transmute::<i16x8, _>(ret)
602 }
603}
604
605#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vpbroadcastw))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
614 unsafe {
615 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
616 transmute::<i16x16, _>(ret)
617 }
618}
619
620#[inline]
624#[target_feature(enable = "avx2")]
625#[cfg_attr(test, assert_instr(vpcmpeqq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
628 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
629}
630
631#[inline]
635#[target_feature(enable = "avx2")]
636#[cfg_attr(test, assert_instr(vpcmpeqd))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
639 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
640}
641
642#[inline]
646#[target_feature(enable = "avx2")]
647#[cfg_attr(test, assert_instr(vpcmpeqw))]
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
650 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
651}
652
653#[inline]
657#[target_feature(enable = "avx2")]
658#[cfg_attr(test, assert_instr(vpcmpeqb))]
659#[stable(feature = "simd_x86", since = "1.27.0")]
660pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
661 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
662}
663
664#[inline]
668#[target_feature(enable = "avx2")]
669#[cfg_attr(test, assert_instr(vpcmpgtq))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
672 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
673}
674
675#[inline]
679#[target_feature(enable = "avx2")]
680#[cfg_attr(test, assert_instr(vpcmpgtd))]
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
683 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
684}
685
686#[inline]
690#[target_feature(enable = "avx2")]
691#[cfg_attr(test, assert_instr(vpcmpgtw))]
692#[stable(feature = "simd_x86", since = "1.27.0")]
693pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
694 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
695}
696
697#[inline]
701#[target_feature(enable = "avx2")]
702#[cfg_attr(test, assert_instr(vpcmpgtb))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
705 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
706}
707
708#[inline]
712#[target_feature(enable = "avx2")]
713#[cfg_attr(test, assert_instr(vpmovsxwd))]
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
716 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
717}
718
719#[inline]
723#[target_feature(enable = "avx2")]
724#[cfg_attr(test, assert_instr(vpmovsxwq))]
725#[stable(feature = "simd_x86", since = "1.27.0")]
726pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
727 unsafe {
728 let a = a.as_i16x8();
729 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
730 transmute::<i64x4, _>(simd_cast(v64))
731 }
732}
733
734#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpmovsxdq))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
742 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
743}
744
745#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxbw))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
753 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
754}
755
756#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxbd))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
764 unsafe {
765 let a = a.as_i8x16();
766 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
767 transmute::<i32x8, _>(simd_cast(v64))
768 }
769}
770
771#[inline]
775#[target_feature(enable = "avx2")]
776#[cfg_attr(test, assert_instr(vpmovsxbq))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
779 unsafe {
780 let a = a.as_i8x16();
781 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
782 transmute::<i64x4, _>(simd_cast(v32))
783 }
784}
785
786#[inline]
791#[target_feature(enable = "avx2")]
792#[cfg_attr(test, assert_instr(vpmovzxwd))]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
795 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
796}
797
798#[inline]
803#[target_feature(enable = "avx2")]
804#[cfg_attr(test, assert_instr(vpmovzxwq))]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
807 unsafe {
808 let a = a.as_u16x8();
809 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
810 transmute::<i64x4, _>(simd_cast(v64))
811 }
812}
813
814#[inline]
818#[target_feature(enable = "avx2")]
819#[cfg_attr(test, assert_instr(vpmovzxdq))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
822 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
823}
824
825#[inline]
829#[target_feature(enable = "avx2")]
830#[cfg_attr(test, assert_instr(vpmovzxbw))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
833 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
834}
835
836#[inline]
841#[target_feature(enable = "avx2")]
842#[cfg_attr(test, assert_instr(vpmovzxbd))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
845 unsafe {
846 let a = a.as_u8x16();
847 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
848 transmute::<i32x8, _>(simd_cast(v64))
849 }
850}
851
852#[inline]
857#[target_feature(enable = "avx2")]
858#[cfg_attr(test, assert_instr(vpmovzxbq))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
861 unsafe {
862 let a = a.as_u8x16();
863 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
864 transmute::<i64x4, _>(simd_cast(v32))
865 }
866}
867
868#[inline]
872#[target_feature(enable = "avx2")]
873#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
874#[rustc_legacy_const_generics(1)]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
877 static_assert_uimm_bits!(IMM1, 1);
878 unsafe {
879 let a = a.as_i64x4();
880 let b = i64x4::ZERO;
881 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
882 transmute(dst)
883 }
884}
885
886#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vphaddw))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894 unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) }
895}
896
897#[inline]
901#[target_feature(enable = "avx2")]
902#[cfg_attr(test, assert_instr(vphaddd))]
903#[stable(feature = "simd_x86", since = "1.27.0")]
904pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
905 unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) }
906}
907
908#[inline]
913#[target_feature(enable = "avx2")]
914#[cfg_attr(test, assert_instr(vphaddsw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
917 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
918}
919
920#[inline]
924#[target_feature(enable = "avx2")]
925#[cfg_attr(test, assert_instr(vphsubw))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
928 unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) }
929}
930
931#[inline]
935#[target_feature(enable = "avx2")]
936#[cfg_attr(test, assert_instr(vphsubd))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
939 unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) }
940}
941
942#[inline]
947#[target_feature(enable = "avx2")]
948#[cfg_attr(test, assert_instr(vphsubsw))]
949#[stable(feature = "simd_x86", since = "1.27.0")]
950pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
951 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
952}
953
954#[inline]
960#[target_feature(enable = "avx2")]
961#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
962#[rustc_legacy_const_generics(2)]
963#[stable(feature = "simd_x86", since = "1.27.0")]
964pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
965 slice: *const i32,
966 offsets: __m128i,
967) -> __m128i {
968 static_assert_imm8_scale!(SCALE);
969 let zero = i32x4::ZERO;
970 let neg_one = _mm_set1_epi32(-1).as_i32x4();
971 let offsets = offsets.as_i32x4();
972 let slice = slice as *const i8;
973 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
974 transmute(r)
975}
976
977#[inline]
984#[target_feature(enable = "avx2")]
985#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
986#[rustc_legacy_const_generics(4)]
987#[stable(feature = "simd_x86", since = "1.27.0")]
988pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
989 src: __m128i,
990 slice: *const i32,
991 offsets: __m128i,
992 mask: __m128i,
993) -> __m128i {
994 static_assert_imm8_scale!(SCALE);
995 let src = src.as_i32x4();
996 let mask = mask.as_i32x4();
997 let offsets = offsets.as_i32x4();
998 let slice = slice as *const i8;
999 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1000 transmute(r)
1001}
1002
1003#[inline]
1009#[target_feature(enable = "avx2")]
1010#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1011#[rustc_legacy_const_generics(2)]
1012#[stable(feature = "simd_x86", since = "1.27.0")]
1013pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1014 slice: *const i32,
1015 offsets: __m256i,
1016) -> __m256i {
1017 static_assert_imm8_scale!(SCALE);
1018 let zero = i32x8::ZERO;
1019 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1020 let offsets = offsets.as_i32x8();
1021 let slice = slice as *const i8;
1022 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1023 transmute(r)
1024}
1025
1026#[inline]
1033#[target_feature(enable = "avx2")]
1034#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1035#[rustc_legacy_const_generics(4)]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1038 src: __m256i,
1039 slice: *const i32,
1040 offsets: __m256i,
1041 mask: __m256i,
1042) -> __m256i {
1043 static_assert_imm8_scale!(SCALE);
1044 let src = src.as_i32x8();
1045 let mask = mask.as_i32x8();
1046 let offsets = offsets.as_i32x8();
1047 let slice = slice as *const i8;
1048 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1049 transmute(r)
1050}
1051
1052#[inline]
1058#[target_feature(enable = "avx2")]
1059#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1060#[rustc_legacy_const_generics(2)]
1061#[stable(feature = "simd_x86", since = "1.27.0")]
1062pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1063 static_assert_imm8_scale!(SCALE);
1064 let zero = _mm_setzero_ps();
1065 let neg_one = _mm_set1_ps(-1.0);
1066 let offsets = offsets.as_i32x4();
1067 let slice = slice as *const i8;
1068 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1069}
1070
1071#[inline]
1078#[target_feature(enable = "avx2")]
1079#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1080#[rustc_legacy_const_generics(4)]
1081#[stable(feature = "simd_x86", since = "1.27.0")]
1082pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1083 src: __m128,
1084 slice: *const f32,
1085 offsets: __m128i,
1086 mask: __m128,
1087) -> __m128 {
1088 static_assert_imm8_scale!(SCALE);
1089 let offsets = offsets.as_i32x4();
1090 let slice = slice as *const i8;
1091 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1092}
1093
1094#[inline]
1100#[target_feature(enable = "avx2")]
1101#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1102#[rustc_legacy_const_generics(2)]
1103#[stable(feature = "simd_x86", since = "1.27.0")]
1104pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1105 static_assert_imm8_scale!(SCALE);
1106 let zero = _mm256_setzero_ps();
1107 let neg_one = _mm256_set1_ps(-1.0);
1108 let offsets = offsets.as_i32x8();
1109 let slice = slice as *const i8;
1110 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1111}
1112
1113#[inline]
1120#[target_feature(enable = "avx2")]
1121#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1122#[rustc_legacy_const_generics(4)]
1123#[stable(feature = "simd_x86", since = "1.27.0")]
1124pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1125 src: __m256,
1126 slice: *const f32,
1127 offsets: __m256i,
1128 mask: __m256,
1129) -> __m256 {
1130 static_assert_imm8_scale!(SCALE);
1131 let offsets = offsets.as_i32x8();
1132 let slice = slice as *const i8;
1133 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1134}
1135
1136#[inline]
1142#[target_feature(enable = "avx2")]
1143#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1144#[rustc_legacy_const_generics(2)]
1145#[stable(feature = "simd_x86", since = "1.27.0")]
1146pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1147 slice: *const i64,
1148 offsets: __m128i,
1149) -> __m128i {
1150 static_assert_imm8_scale!(SCALE);
1151 let zero = i64x2::ZERO;
1152 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1153 let offsets = offsets.as_i32x4();
1154 let slice = slice as *const i8;
1155 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1156 transmute(r)
1157}
1158
1159#[inline]
1166#[target_feature(enable = "avx2")]
1167#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1168#[rustc_legacy_const_generics(4)]
1169#[stable(feature = "simd_x86", since = "1.27.0")]
1170pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1171 src: __m128i,
1172 slice: *const i64,
1173 offsets: __m128i,
1174 mask: __m128i,
1175) -> __m128i {
1176 static_assert_imm8_scale!(SCALE);
1177 let src = src.as_i64x2();
1178 let mask = mask.as_i64x2();
1179 let offsets = offsets.as_i32x4();
1180 let slice = slice as *const i8;
1181 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1182 transmute(r)
1183}
1184
1185#[inline]
1191#[target_feature(enable = "avx2")]
1192#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1193#[rustc_legacy_const_generics(2)]
1194#[stable(feature = "simd_x86", since = "1.27.0")]
1195pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1196 slice: *const i64,
1197 offsets: __m128i,
1198) -> __m256i {
1199 static_assert_imm8_scale!(SCALE);
1200 let zero = i64x4::ZERO;
1201 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1202 let offsets = offsets.as_i32x4();
1203 let slice = slice as *const i8;
1204 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1205 transmute(r)
1206}
1207
1208#[inline]
1215#[target_feature(enable = "avx2")]
1216#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1217#[rustc_legacy_const_generics(4)]
1218#[stable(feature = "simd_x86", since = "1.27.0")]
1219pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1220 src: __m256i,
1221 slice: *const i64,
1222 offsets: __m128i,
1223 mask: __m256i,
1224) -> __m256i {
1225 static_assert_imm8_scale!(SCALE);
1226 let src = src.as_i64x4();
1227 let mask = mask.as_i64x4();
1228 let offsets = offsets.as_i32x4();
1229 let slice = slice as *const i8;
1230 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1231 transmute(r)
1232}
1233
1234#[inline]
1240#[target_feature(enable = "avx2")]
1241#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1242#[rustc_legacy_const_generics(2)]
1243#[stable(feature = "simd_x86", since = "1.27.0")]
1244pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1245 static_assert_imm8_scale!(SCALE);
1246 let zero = _mm_setzero_pd();
1247 let neg_one = _mm_set1_pd(-1.0);
1248 let offsets = offsets.as_i32x4();
1249 let slice = slice as *const i8;
1250 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1251}
1252
1253#[inline]
1260#[target_feature(enable = "avx2")]
1261#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1262#[rustc_legacy_const_generics(4)]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1265 src: __m128d,
1266 slice: *const f64,
1267 offsets: __m128i,
1268 mask: __m128d,
1269) -> __m128d {
1270 static_assert_imm8_scale!(SCALE);
1271 let offsets = offsets.as_i32x4();
1272 let slice = slice as *const i8;
1273 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1274}
1275
1276#[inline]
1282#[target_feature(enable = "avx2")]
1283#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1284#[rustc_legacy_const_generics(2)]
1285#[stable(feature = "simd_x86", since = "1.27.0")]
1286pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1287 slice: *const f64,
1288 offsets: __m128i,
1289) -> __m256d {
1290 static_assert_imm8_scale!(SCALE);
1291 let zero = _mm256_setzero_pd();
1292 let neg_one = _mm256_set1_pd(-1.0);
1293 let offsets = offsets.as_i32x4();
1294 let slice = slice as *const i8;
1295 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1296}
1297
1298#[inline]
1305#[target_feature(enable = "avx2")]
1306#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1307#[rustc_legacy_const_generics(4)]
1308#[stable(feature = "simd_x86", since = "1.27.0")]
1309pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1310 src: __m256d,
1311 slice: *const f64,
1312 offsets: __m128i,
1313 mask: __m256d,
1314) -> __m256d {
1315 static_assert_imm8_scale!(SCALE);
1316 let offsets = offsets.as_i32x4();
1317 let slice = slice as *const i8;
1318 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1319}
1320
1321#[inline]
1327#[target_feature(enable = "avx2")]
1328#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1329#[rustc_legacy_const_generics(2)]
1330#[stable(feature = "simd_x86", since = "1.27.0")]
1331pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1332 slice: *const i32,
1333 offsets: __m128i,
1334) -> __m128i {
1335 static_assert_imm8_scale!(SCALE);
1336 let zero = i32x4::ZERO;
1337 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1338 let offsets = offsets.as_i64x2();
1339 let slice = slice as *const i8;
1340 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1341 transmute(r)
1342}
1343
1344#[inline]
1351#[target_feature(enable = "avx2")]
1352#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1353#[rustc_legacy_const_generics(4)]
1354#[stable(feature = "simd_x86", since = "1.27.0")]
1355pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1356 src: __m128i,
1357 slice: *const i32,
1358 offsets: __m128i,
1359 mask: __m128i,
1360) -> __m128i {
1361 static_assert_imm8_scale!(SCALE);
1362 let src = src.as_i32x4();
1363 let mask = mask.as_i32x4();
1364 let offsets = offsets.as_i64x2();
1365 let slice = slice as *const i8;
1366 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1367 transmute(r)
1368}
1369
1370#[inline]
1376#[target_feature(enable = "avx2")]
1377#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1378#[rustc_legacy_const_generics(2)]
1379#[stable(feature = "simd_x86", since = "1.27.0")]
1380pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1381 slice: *const i32,
1382 offsets: __m256i,
1383) -> __m128i {
1384 static_assert_imm8_scale!(SCALE);
1385 let zero = i32x4::ZERO;
1386 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1387 let offsets = offsets.as_i64x4();
1388 let slice = slice as *const i8;
1389 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1390 transmute(r)
1391}
1392
1393#[inline]
1400#[target_feature(enable = "avx2")]
1401#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1402#[rustc_legacy_const_generics(4)]
1403#[stable(feature = "simd_x86", since = "1.27.0")]
1404pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1405 src: __m128i,
1406 slice: *const i32,
1407 offsets: __m256i,
1408 mask: __m128i,
1409) -> __m128i {
1410 static_assert_imm8_scale!(SCALE);
1411 let src = src.as_i32x4();
1412 let mask = mask.as_i32x4();
1413 let offsets = offsets.as_i64x4();
1414 let slice = slice as *const i8;
1415 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1416 transmute(r)
1417}
1418
1419#[inline]
1425#[target_feature(enable = "avx2")]
1426#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1427#[rustc_legacy_const_generics(2)]
1428#[stable(feature = "simd_x86", since = "1.27.0")]
1429pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1430 static_assert_imm8_scale!(SCALE);
1431 let zero = _mm_setzero_ps();
1432 let neg_one = _mm_set1_ps(-1.0);
1433 let offsets = offsets.as_i64x2();
1434 let slice = slice as *const i8;
1435 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1436}
1437
1438#[inline]
1445#[target_feature(enable = "avx2")]
1446#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1447#[rustc_legacy_const_generics(4)]
1448#[stable(feature = "simd_x86", since = "1.27.0")]
1449pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1450 src: __m128,
1451 slice: *const f32,
1452 offsets: __m128i,
1453 mask: __m128,
1454) -> __m128 {
1455 static_assert_imm8_scale!(SCALE);
1456 let offsets = offsets.as_i64x2();
1457 let slice = slice as *const i8;
1458 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1459}
1460
1461#[inline]
1467#[target_feature(enable = "avx2")]
1468#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1469#[rustc_legacy_const_generics(2)]
1470#[stable(feature = "simd_x86", since = "1.27.0")]
1471pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1472 static_assert_imm8_scale!(SCALE);
1473 let zero = _mm_setzero_ps();
1474 let neg_one = _mm_set1_ps(-1.0);
1475 let offsets = offsets.as_i64x4();
1476 let slice = slice as *const i8;
1477 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1478}
1479
1480#[inline]
1487#[target_feature(enable = "avx2")]
1488#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1489#[rustc_legacy_const_generics(4)]
1490#[stable(feature = "simd_x86", since = "1.27.0")]
1491pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1492 src: __m128,
1493 slice: *const f32,
1494 offsets: __m256i,
1495 mask: __m128,
1496) -> __m128 {
1497 static_assert_imm8_scale!(SCALE);
1498 let offsets = offsets.as_i64x4();
1499 let slice = slice as *const i8;
1500 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1501}
1502
1503#[inline]
1509#[target_feature(enable = "avx2")]
1510#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1511#[rustc_legacy_const_generics(2)]
1512#[stable(feature = "simd_x86", since = "1.27.0")]
1513pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1514 slice: *const i64,
1515 offsets: __m128i,
1516) -> __m128i {
1517 static_assert_imm8_scale!(SCALE);
1518 let zero = i64x2::ZERO;
1519 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1520 let slice = slice as *const i8;
1521 let offsets = offsets.as_i64x2();
1522 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1523 transmute(r)
1524}
1525
1526#[inline]
1533#[target_feature(enable = "avx2")]
1534#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1535#[rustc_legacy_const_generics(4)]
1536#[stable(feature = "simd_x86", since = "1.27.0")]
1537pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1538 src: __m128i,
1539 slice: *const i64,
1540 offsets: __m128i,
1541 mask: __m128i,
1542) -> __m128i {
1543 static_assert_imm8_scale!(SCALE);
1544 let src = src.as_i64x2();
1545 let mask = mask.as_i64x2();
1546 let offsets = offsets.as_i64x2();
1547 let slice = slice as *const i8;
1548 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1549 transmute(r)
1550}
1551
1552#[inline]
1558#[target_feature(enable = "avx2")]
1559#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1560#[rustc_legacy_const_generics(2)]
1561#[stable(feature = "simd_x86", since = "1.27.0")]
1562pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1563 slice: *const i64,
1564 offsets: __m256i,
1565) -> __m256i {
1566 static_assert_imm8_scale!(SCALE);
1567 let zero = i64x4::ZERO;
1568 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1569 let slice = slice as *const i8;
1570 let offsets = offsets.as_i64x4();
1571 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1572 transmute(r)
1573}
1574
1575#[inline]
1582#[target_feature(enable = "avx2")]
1583#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1584#[rustc_legacy_const_generics(4)]
1585#[stable(feature = "simd_x86", since = "1.27.0")]
1586pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1587 src: __m256i,
1588 slice: *const i64,
1589 offsets: __m256i,
1590 mask: __m256i,
1591) -> __m256i {
1592 static_assert_imm8_scale!(SCALE);
1593 let src = src.as_i64x4();
1594 let mask = mask.as_i64x4();
1595 let offsets = offsets.as_i64x4();
1596 let slice = slice as *const i8;
1597 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1598 transmute(r)
1599}
1600
1601#[inline]
1607#[target_feature(enable = "avx2")]
1608#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1609#[rustc_legacy_const_generics(2)]
1610#[stable(feature = "simd_x86", since = "1.27.0")]
1611pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1612 static_assert_imm8_scale!(SCALE);
1613 let zero = _mm_setzero_pd();
1614 let neg_one = _mm_set1_pd(-1.0);
1615 let slice = slice as *const i8;
1616 let offsets = offsets.as_i64x2();
1617 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1618}
1619
1620#[inline]
1627#[target_feature(enable = "avx2")]
1628#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1629#[rustc_legacy_const_generics(4)]
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1632 src: __m128d,
1633 slice: *const f64,
1634 offsets: __m128i,
1635 mask: __m128d,
1636) -> __m128d {
1637 static_assert_imm8_scale!(SCALE);
1638 let slice = slice as *const i8;
1639 let offsets = offsets.as_i64x2();
1640 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1641}
1642
1643#[inline]
1649#[target_feature(enable = "avx2")]
1650#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1651#[rustc_legacy_const_generics(2)]
1652#[stable(feature = "simd_x86", since = "1.27.0")]
1653pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1654 slice: *const f64,
1655 offsets: __m256i,
1656) -> __m256d {
1657 static_assert_imm8_scale!(SCALE);
1658 let zero = _mm256_setzero_pd();
1659 let neg_one = _mm256_set1_pd(-1.0);
1660 let slice = slice as *const i8;
1661 let offsets = offsets.as_i64x4();
1662 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1663}
1664
1665#[inline]
1672#[target_feature(enable = "avx2")]
1673#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1674#[rustc_legacy_const_generics(4)]
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1677 src: __m256d,
1678 slice: *const f64,
1679 offsets: __m256i,
1680 mask: __m256d,
1681) -> __m256d {
1682 static_assert_imm8_scale!(SCALE);
1683 let slice = slice as *const i8;
1684 let offsets = offsets.as_i64x4();
1685 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1686}
1687
1688#[inline]
1693#[target_feature(enable = "avx2")]
1694#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1695#[rustc_legacy_const_generics(2)]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1698 static_assert_uimm_bits!(IMM1, 1);
1699 unsafe {
1700 let a = a.as_i64x4();
1701 let b = _mm256_castsi128_si256(b).as_i64x4();
1702 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1703 transmute(dst)
1704 }
1705}
1706
1707#[inline]
1713#[target_feature(enable = "avx2")]
1714#[cfg_attr(test, assert_instr(vpmaddwd))]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1717 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1718}
1719
1720#[inline]
1727#[target_feature(enable = "avx2")]
1728#[cfg_attr(test, assert_instr(vpmaddubsw))]
1729#[stable(feature = "simd_x86", since = "1.27.0")]
1730pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1731 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1732}
1733
1734#[inline]
1740#[target_feature(enable = "avx2")]
1741#[cfg_attr(test, assert_instr(vpmaskmovd))]
1742#[stable(feature = "simd_x86", since = "1.27.0")]
1743pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1744 transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
1745}
1746
1747#[inline]
1753#[target_feature(enable = "avx2")]
1754#[cfg_attr(test, assert_instr(vpmaskmovd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1757 transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
1758}
1759
1760#[inline]
1766#[target_feature(enable = "avx2")]
1767#[cfg_attr(test, assert_instr(vpmaskmovq))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1770 transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
1771}
1772
1773#[inline]
1779#[target_feature(enable = "avx2")]
1780#[cfg_attr(test, assert_instr(vpmaskmovq))]
1781#[stable(feature = "simd_x86", since = "1.27.0")]
1782pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1783 transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
1784}
1785
1786#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vpmaskmovd))]
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1795pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1796 maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
1797}
1798
1799#[inline]
1805#[target_feature(enable = "avx2")]
1806#[cfg_attr(test, assert_instr(vpmaskmovd))]
1807#[stable(feature = "simd_x86", since = "1.27.0")]
1808pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1809 maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
1810}
1811
1812#[inline]
1818#[target_feature(enable = "avx2")]
1819#[cfg_attr(test, assert_instr(vpmaskmovq))]
1820#[stable(feature = "simd_x86", since = "1.27.0")]
1821pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1822 maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
1823}
1824
1825#[inline]
1831#[target_feature(enable = "avx2")]
1832#[cfg_attr(test, assert_instr(vpmaskmovq))]
1833#[stable(feature = "simd_x86", since = "1.27.0")]
1834pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1835 maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
1836}
1837
1838#[inline]
1843#[target_feature(enable = "avx2")]
1844#[cfg_attr(test, assert_instr(vpmaxsw))]
1845#[stable(feature = "simd_x86", since = "1.27.0")]
1846pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1847 unsafe {
1848 let a = a.as_i16x16();
1849 let b = b.as_i16x16();
1850 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1851 }
1852}
1853
1854#[inline]
1859#[target_feature(enable = "avx2")]
1860#[cfg_attr(test, assert_instr(vpmaxsd))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1863 unsafe {
1864 let a = a.as_i32x8();
1865 let b = b.as_i32x8();
1866 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1867 }
1868}
1869
1870#[inline]
1875#[target_feature(enable = "avx2")]
1876#[cfg_attr(test, assert_instr(vpmaxsb))]
1877#[stable(feature = "simd_x86", since = "1.27.0")]
1878pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1879 unsafe {
1880 let a = a.as_i8x32();
1881 let b = b.as_i8x32();
1882 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1883 }
1884}
1885
1886#[inline]
1891#[target_feature(enable = "avx2")]
1892#[cfg_attr(test, assert_instr(vpmaxuw))]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1895 unsafe {
1896 let a = a.as_u16x16();
1897 let b = b.as_u16x16();
1898 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1899 }
1900}
1901
1902#[inline]
1907#[target_feature(enable = "avx2")]
1908#[cfg_attr(test, assert_instr(vpmaxud))]
1909#[stable(feature = "simd_x86", since = "1.27.0")]
1910pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1911 unsafe {
1912 let a = a.as_u32x8();
1913 let b = b.as_u32x8();
1914 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1915 }
1916}
1917
1918#[inline]
1923#[target_feature(enable = "avx2")]
1924#[cfg_attr(test, assert_instr(vpmaxub))]
1925#[stable(feature = "simd_x86", since = "1.27.0")]
1926pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1927 unsafe {
1928 let a = a.as_u8x32();
1929 let b = b.as_u8x32();
1930 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1931 }
1932}
1933
1934#[inline]
1939#[target_feature(enable = "avx2")]
1940#[cfg_attr(test, assert_instr(vpminsw))]
1941#[stable(feature = "simd_x86", since = "1.27.0")]
1942pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1943 unsafe {
1944 let a = a.as_i16x16();
1945 let b = b.as_i16x16();
1946 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
1947 }
1948}
1949
1950#[inline]
1955#[target_feature(enable = "avx2")]
1956#[cfg_attr(test, assert_instr(vpminsd))]
1957#[stable(feature = "simd_x86", since = "1.27.0")]
1958pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1959 unsafe {
1960 let a = a.as_i32x8();
1961 let b = b.as_i32x8();
1962 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
1963 }
1964}
1965
1966#[inline]
1971#[target_feature(enable = "avx2")]
1972#[cfg_attr(test, assert_instr(vpminsb))]
1973#[stable(feature = "simd_x86", since = "1.27.0")]
1974pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1975 unsafe {
1976 let a = a.as_i8x32();
1977 let b = b.as_i8x32();
1978 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
1979 }
1980}
1981
1982#[inline]
1987#[target_feature(enable = "avx2")]
1988#[cfg_attr(test, assert_instr(vpminuw))]
1989#[stable(feature = "simd_x86", since = "1.27.0")]
1990pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
1991 unsafe {
1992 let a = a.as_u16x16();
1993 let b = b.as_u16x16();
1994 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
1995 }
1996}
1997
1998#[inline]
2003#[target_feature(enable = "avx2")]
2004#[cfg_attr(test, assert_instr(vpminud))]
2005#[stable(feature = "simd_x86", since = "1.27.0")]
2006pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2007 unsafe {
2008 let a = a.as_u32x8();
2009 let b = b.as_u32x8();
2010 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2011 }
2012}
2013
2014#[inline]
2019#[target_feature(enable = "avx2")]
2020#[cfg_attr(test, assert_instr(vpminub))]
2021#[stable(feature = "simd_x86", since = "1.27.0")]
2022pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2023 unsafe {
2024 let a = a.as_u8x32();
2025 let b = b.as_u8x32();
2026 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2027 }
2028}
2029
2030#[inline]
2035#[target_feature(enable = "avx2")]
2036#[cfg_attr(test, assert_instr(vpmovmskb))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2039 unsafe {
2040 let z = i8x32::ZERO;
2041 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2042 simd_bitmask::<_, u32>(m) as i32
2043 }
2044}
2045
2046#[inline]
2056#[target_feature(enable = "avx2")]
2057#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2058#[rustc_legacy_const_generics(2)]
2059#[stable(feature = "simd_x86", since = "1.27.0")]
2060pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2061 static_assert_uimm_bits!(IMM8, 8);
2062 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2063}
2064
2065#[inline]
2072#[target_feature(enable = "avx2")]
2073#[cfg_attr(test, assert_instr(vpmuldq))]
2074#[stable(feature = "simd_x86", since = "1.27.0")]
2075pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2076 unsafe {
2077 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2078 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2079 transmute(simd_mul(a, b))
2080 }
2081}
2082
2083#[inline]
2090#[target_feature(enable = "avx2")]
2091#[cfg_attr(test, assert_instr(vpmuludq))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2094 unsafe {
2095 let a = a.as_u64x4();
2096 let b = b.as_u64x4();
2097 let mask = u64x4::splat(u32::MAX.into());
2098 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2099 }
2100}
2101
2102#[inline]
2108#[target_feature(enable = "avx2")]
2109#[cfg_attr(test, assert_instr(vpmulhw))]
2110#[stable(feature = "simd_x86", since = "1.27.0")]
2111pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2112 unsafe {
2113 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2114 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2115 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2116 transmute(simd_cast::<i32x16, i16x16>(r))
2117 }
2118}
2119
2120#[inline]
2126#[target_feature(enable = "avx2")]
2127#[cfg_attr(test, assert_instr(vpmulhuw))]
2128#[stable(feature = "simd_x86", since = "1.27.0")]
2129pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2130 unsafe {
2131 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2132 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2133 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2134 transmute(simd_cast::<u32x16, u16x16>(r))
2135 }
2136}
2137
2138#[inline]
2144#[target_feature(enable = "avx2")]
2145#[cfg_attr(test, assert_instr(vpmullw))]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2148 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2149}
2150
2151#[inline]
2157#[target_feature(enable = "avx2")]
2158#[cfg_attr(test, assert_instr(vpmulld))]
2159#[stable(feature = "simd_x86", since = "1.27.0")]
2160pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2161 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2162}
2163
2164#[inline]
2171#[target_feature(enable = "avx2")]
2172#[cfg_attr(test, assert_instr(vpmulhrsw))]
2173#[stable(feature = "simd_x86", since = "1.27.0")]
2174pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2175 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2176}
2177
2178#[inline]
2183#[target_feature(enable = "avx2")]
2184#[cfg_attr(test, assert_instr(vorps))]
2185#[stable(feature = "simd_x86", since = "1.27.0")]
2186pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2187 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2188}
2189
2190#[inline]
2195#[target_feature(enable = "avx2")]
2196#[cfg_attr(test, assert_instr(vpacksswb))]
2197#[stable(feature = "simd_x86", since = "1.27.0")]
2198pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2199 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2200}
2201
2202#[inline]
2207#[target_feature(enable = "avx2")]
2208#[cfg_attr(test, assert_instr(vpackssdw))]
2209#[stable(feature = "simd_x86", since = "1.27.0")]
2210pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2211 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2212}
2213
2214#[inline]
2219#[target_feature(enable = "avx2")]
2220#[cfg_attr(test, assert_instr(vpackuswb))]
2221#[stable(feature = "simd_x86", since = "1.27.0")]
2222pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2223 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2224}
2225
2226#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpackusdw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2235 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2236}
2237
2238#[inline]
2245#[target_feature(enable = "avx2")]
2246#[cfg_attr(test, assert_instr(vpermps))]
2247#[stable(feature = "simd_x86", since = "1.27.0")]
2248pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2249 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2250}
2251
2252#[inline]
2256#[target_feature(enable = "avx2")]
2257#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2258#[rustc_legacy_const_generics(1)]
2259#[stable(feature = "simd_x86", since = "1.27.0")]
2260pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2261 static_assert_uimm_bits!(IMM8, 8);
2262 unsafe {
2263 let zero = i64x4::ZERO;
2264 let r: i64x4 = simd_shuffle!(
2265 a.as_i64x4(),
2266 zero,
2267 [
2268 IMM8 as u32 & 0b11,
2269 (IMM8 as u32 >> 2) & 0b11,
2270 (IMM8 as u32 >> 4) & 0b11,
2271 (IMM8 as u32 >> 6) & 0b11,
2272 ],
2273 );
2274 transmute(r)
2275 }
2276}
2277
2278#[inline]
2282#[target_feature(enable = "avx2")]
2283#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2284#[rustc_legacy_const_generics(2)]
2285#[stable(feature = "simd_x86", since = "1.27.0")]
2286pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2287 static_assert_uimm_bits!(IMM8, 8);
2288 unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) }
2289}
2290
2291#[inline]
2296#[target_feature(enable = "avx2")]
2297#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2298#[rustc_legacy_const_generics(1)]
2299#[stable(feature = "simd_x86", since = "1.27.0")]
2300pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2301 static_assert_uimm_bits!(IMM8, 8);
2302 unsafe {
2303 simd_shuffle!(
2304 a,
2305 _mm256_undefined_pd(),
2306 [
2307 IMM8 as u32 & 0b11,
2308 (IMM8 as u32 >> 2) & 0b11,
2309 (IMM8 as u32 >> 4) & 0b11,
2310 (IMM8 as u32 >> 6) & 0b11,
2311 ],
2312 )
2313 }
2314}
2315
2316#[inline]
2321#[target_feature(enable = "avx2")]
2322#[cfg_attr(test, assert_instr(vpermps))]
2323#[stable(feature = "simd_x86", since = "1.27.0")]
2324pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2325 unsafe { permps(a, idx.as_i32x8()) }
2326}
2327
2328#[inline]
2335#[target_feature(enable = "avx2")]
2336#[cfg_attr(test, assert_instr(vpsadbw))]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2339 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2340}
2341
2342#[inline]
2373#[target_feature(enable = "avx2")]
2374#[cfg_attr(test, assert_instr(vpshufb))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2377 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2378}
2379
2380#[inline]
2411#[target_feature(enable = "avx2")]
2412#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2413#[rustc_legacy_const_generics(1)]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2416 static_assert_uimm_bits!(MASK, 8);
2417 unsafe {
2418 let r: i32x8 = simd_shuffle!(
2419 a.as_i32x8(),
2420 a.as_i32x8(),
2421 [
2422 MASK as u32 & 0b11,
2423 (MASK as u32 >> 2) & 0b11,
2424 (MASK as u32 >> 4) & 0b11,
2425 (MASK as u32 >> 6) & 0b11,
2426 (MASK as u32 & 0b11) + 4,
2427 ((MASK as u32 >> 2) & 0b11) + 4,
2428 ((MASK as u32 >> 4) & 0b11) + 4,
2429 ((MASK as u32 >> 6) & 0b11) + 4,
2430 ],
2431 );
2432 transmute(r)
2433 }
2434}
2435
2436#[inline]
2442#[target_feature(enable = "avx2")]
2443#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2444#[rustc_legacy_const_generics(1)]
2445#[stable(feature = "simd_x86", since = "1.27.0")]
2446pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2447 static_assert_uimm_bits!(IMM8, 8);
2448 unsafe {
2449 let a = a.as_i16x16();
2450 let r: i16x16 = simd_shuffle!(
2451 a,
2452 a,
2453 [
2454 0,
2455 1,
2456 2,
2457 3,
2458 4 + (IMM8 as u32 & 0b11),
2459 4 + ((IMM8 as u32 >> 2) & 0b11),
2460 4 + ((IMM8 as u32 >> 4) & 0b11),
2461 4 + ((IMM8 as u32 >> 6) & 0b11),
2462 8,
2463 9,
2464 10,
2465 11,
2466 12 + (IMM8 as u32 & 0b11),
2467 12 + ((IMM8 as u32 >> 2) & 0b11),
2468 12 + ((IMM8 as u32 >> 4) & 0b11),
2469 12 + ((IMM8 as u32 >> 6) & 0b11),
2470 ],
2471 );
2472 transmute(r)
2473 }
2474}
2475
2476#[inline]
2482#[target_feature(enable = "avx2")]
2483#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2484#[rustc_legacy_const_generics(1)]
2485#[stable(feature = "simd_x86", since = "1.27.0")]
2486pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2487 static_assert_uimm_bits!(IMM8, 8);
2488 unsafe {
2489 let a = a.as_i16x16();
2490 let r: i16x16 = simd_shuffle!(
2491 a,
2492 a,
2493 [
2494 0 + (IMM8 as u32 & 0b11),
2495 0 + ((IMM8 as u32 >> 2) & 0b11),
2496 0 + ((IMM8 as u32 >> 4) & 0b11),
2497 0 + ((IMM8 as u32 >> 6) & 0b11),
2498 4,
2499 5,
2500 6,
2501 7,
2502 8 + (IMM8 as u32 & 0b11),
2503 8 + ((IMM8 as u32 >> 2) & 0b11),
2504 8 + ((IMM8 as u32 >> 4) & 0b11),
2505 8 + ((IMM8 as u32 >> 6) & 0b11),
2506 12,
2507 13,
2508 14,
2509 15,
2510 ],
2511 );
2512 transmute(r)
2513 }
2514}
2515
2516#[inline]
2522#[target_feature(enable = "avx2")]
2523#[cfg_attr(test, assert_instr(vpsignw))]
2524#[stable(feature = "simd_x86", since = "1.27.0")]
2525pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2526 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2527}
2528
2529#[inline]
2535#[target_feature(enable = "avx2")]
2536#[cfg_attr(test, assert_instr(vpsignd))]
2537#[stable(feature = "simd_x86", since = "1.27.0")]
2538pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2539 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2540}
2541
2542#[inline]
2548#[target_feature(enable = "avx2")]
2549#[cfg_attr(test, assert_instr(vpsignb))]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2552 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2553}
2554
2555#[inline]
2560#[target_feature(enable = "avx2")]
2561#[cfg_attr(test, assert_instr(vpsllw))]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2564 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2565}
2566
2567#[inline]
2572#[target_feature(enable = "avx2")]
2573#[cfg_attr(test, assert_instr(vpslld))]
2574#[stable(feature = "simd_x86", since = "1.27.0")]
2575pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2576 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2577}
2578
2579#[inline]
2584#[target_feature(enable = "avx2")]
2585#[cfg_attr(test, assert_instr(vpsllq))]
2586#[stable(feature = "simd_x86", since = "1.27.0")]
2587pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2588 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2589}
2590
2591#[inline]
2596#[target_feature(enable = "avx2")]
2597#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2598#[rustc_legacy_const_generics(1)]
2599#[stable(feature = "simd_x86", since = "1.27.0")]
2600pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2601 static_assert_uimm_bits!(IMM8, 8);
2602 unsafe {
2603 if IMM8 >= 16 {
2604 _mm256_setzero_si256()
2605 } else {
2606 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2607 }
2608 }
2609}
2610
2611#[inline]
2616#[target_feature(enable = "avx2")]
2617#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2618#[rustc_legacy_const_generics(1)]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2620pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2621 unsafe {
2622 static_assert_uimm_bits!(IMM8, 8);
2623 if IMM8 >= 32 {
2624 _mm256_setzero_si256()
2625 } else {
2626 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2627 }
2628 }
2629}
2630
2631#[inline]
2636#[target_feature(enable = "avx2")]
2637#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2638#[rustc_legacy_const_generics(1)]
2639#[stable(feature = "simd_x86", since = "1.27.0")]
2640pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2641 unsafe {
2642 static_assert_uimm_bits!(IMM8, 8);
2643 if IMM8 >= 64 {
2644 _mm256_setzero_si256()
2645 } else {
2646 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2647 }
2648 }
2649}
2650
2651#[inline]
2655#[target_feature(enable = "avx2")]
2656#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2657#[rustc_legacy_const_generics(1)]
2658#[stable(feature = "simd_x86", since = "1.27.0")]
2659pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2660 static_assert_uimm_bits!(IMM8, 8);
2661 _mm256_bslli_epi128::<IMM8>(a)
2662}
2663
2664#[inline]
2668#[target_feature(enable = "avx2")]
2669#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2670#[rustc_legacy_const_generics(1)]
2671#[stable(feature = "simd_x86", since = "1.27.0")]
2672pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2673 static_assert_uimm_bits!(IMM8, 8);
2674 const fn mask(shift: i32, i: u32) -> u32 {
2675 let shift = shift as u32 & 0xff;
2676 if shift > 15 || i % 16 < shift {
2677 0
2678 } else {
2679 32 + (i - shift)
2680 }
2681 }
2682 unsafe {
2683 let a = a.as_i8x32();
2684 let r: i8x32 = simd_shuffle!(
2685 i8x32::ZERO,
2686 a,
2687 [
2688 mask(IMM8, 0),
2689 mask(IMM8, 1),
2690 mask(IMM8, 2),
2691 mask(IMM8, 3),
2692 mask(IMM8, 4),
2693 mask(IMM8, 5),
2694 mask(IMM8, 6),
2695 mask(IMM8, 7),
2696 mask(IMM8, 8),
2697 mask(IMM8, 9),
2698 mask(IMM8, 10),
2699 mask(IMM8, 11),
2700 mask(IMM8, 12),
2701 mask(IMM8, 13),
2702 mask(IMM8, 14),
2703 mask(IMM8, 15),
2704 mask(IMM8, 16),
2705 mask(IMM8, 17),
2706 mask(IMM8, 18),
2707 mask(IMM8, 19),
2708 mask(IMM8, 20),
2709 mask(IMM8, 21),
2710 mask(IMM8, 22),
2711 mask(IMM8, 23),
2712 mask(IMM8, 24),
2713 mask(IMM8, 25),
2714 mask(IMM8, 26),
2715 mask(IMM8, 27),
2716 mask(IMM8, 28),
2717 mask(IMM8, 29),
2718 mask(IMM8, 30),
2719 mask(IMM8, 31),
2720 ],
2721 );
2722 transmute(r)
2723 }
2724}
2725
2726#[inline]
2732#[target_feature(enable = "avx2")]
2733#[cfg_attr(test, assert_instr(vpsllvd))]
2734#[stable(feature = "simd_x86", since = "1.27.0")]
2735pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2736 unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2737}
2738
2739#[inline]
2745#[target_feature(enable = "avx2")]
2746#[cfg_attr(test, assert_instr(vpsllvd))]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2749 unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2750}
2751
2752#[inline]
2758#[target_feature(enable = "avx2")]
2759#[cfg_attr(test, assert_instr(vpsllvq))]
2760#[stable(feature = "simd_x86", since = "1.27.0")]
2761pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2762 unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2763}
2764
2765#[inline]
2771#[target_feature(enable = "avx2")]
2772#[cfg_attr(test, assert_instr(vpsllvq))]
2773#[stable(feature = "simd_x86", since = "1.27.0")]
2774pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2775 unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2776}
2777
2778#[inline]
2783#[target_feature(enable = "avx2")]
2784#[cfg_attr(test, assert_instr(vpsraw))]
2785#[stable(feature = "simd_x86", since = "1.27.0")]
2786pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2787 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2788}
2789
2790#[inline]
2795#[target_feature(enable = "avx2")]
2796#[cfg_attr(test, assert_instr(vpsrad))]
2797#[stable(feature = "simd_x86", since = "1.27.0")]
2798pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2799 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2800}
2801
2802#[inline]
2807#[target_feature(enable = "avx2")]
2808#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2809#[rustc_legacy_const_generics(1)]
2810#[stable(feature = "simd_x86", since = "1.27.0")]
2811pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2812 static_assert_uimm_bits!(IMM8, 8);
2813 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2814}
2815
2816#[inline]
2821#[target_feature(enable = "avx2")]
2822#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2823#[rustc_legacy_const_generics(1)]
2824#[stable(feature = "simd_x86", since = "1.27.0")]
2825pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2826 static_assert_uimm_bits!(IMM8, 8);
2827 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2828}
2829
2830#[inline]
2835#[target_feature(enable = "avx2")]
2836#[cfg_attr(test, assert_instr(vpsravd))]
2837#[stable(feature = "simd_x86", since = "1.27.0")]
2838pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2839 unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2840}
2841
2842#[inline]
2847#[target_feature(enable = "avx2")]
2848#[cfg_attr(test, assert_instr(vpsravd))]
2849#[stable(feature = "simd_x86", since = "1.27.0")]
2850pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2851 unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2852}
2853
2854#[inline]
2858#[target_feature(enable = "avx2")]
2859#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2860#[rustc_legacy_const_generics(1)]
2861#[stable(feature = "simd_x86", since = "1.27.0")]
2862pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2863 static_assert_uimm_bits!(IMM8, 8);
2864 _mm256_bsrli_epi128::<IMM8>(a)
2865}
2866
2867#[inline]
2871#[target_feature(enable = "avx2")]
2872#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2873#[rustc_legacy_const_generics(1)]
2874#[stable(feature = "simd_x86", since = "1.27.0")]
2875pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2876 static_assert_uimm_bits!(IMM8, 8);
2877 const fn mask(shift: i32, i: u32) -> u32 {
2878 let shift = shift as u32 & 0xff;
2879 if shift > 15 || (15 - (i % 16)) < shift {
2880 0
2881 } else {
2882 32 + (i + shift)
2883 }
2884 }
2885 unsafe {
2886 let a = a.as_i8x32();
2887 let r: i8x32 = simd_shuffle!(
2888 i8x32::ZERO,
2889 a,
2890 [
2891 mask(IMM8, 0),
2892 mask(IMM8, 1),
2893 mask(IMM8, 2),
2894 mask(IMM8, 3),
2895 mask(IMM8, 4),
2896 mask(IMM8, 5),
2897 mask(IMM8, 6),
2898 mask(IMM8, 7),
2899 mask(IMM8, 8),
2900 mask(IMM8, 9),
2901 mask(IMM8, 10),
2902 mask(IMM8, 11),
2903 mask(IMM8, 12),
2904 mask(IMM8, 13),
2905 mask(IMM8, 14),
2906 mask(IMM8, 15),
2907 mask(IMM8, 16),
2908 mask(IMM8, 17),
2909 mask(IMM8, 18),
2910 mask(IMM8, 19),
2911 mask(IMM8, 20),
2912 mask(IMM8, 21),
2913 mask(IMM8, 22),
2914 mask(IMM8, 23),
2915 mask(IMM8, 24),
2916 mask(IMM8, 25),
2917 mask(IMM8, 26),
2918 mask(IMM8, 27),
2919 mask(IMM8, 28),
2920 mask(IMM8, 29),
2921 mask(IMM8, 30),
2922 mask(IMM8, 31),
2923 ],
2924 );
2925 transmute(r)
2926 }
2927}
2928
2929#[inline]
2934#[target_feature(enable = "avx2")]
2935#[cfg_attr(test, assert_instr(vpsrlw))]
2936#[stable(feature = "simd_x86", since = "1.27.0")]
2937pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2938 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
2939}
2940
2941#[inline]
2946#[target_feature(enable = "avx2")]
2947#[cfg_attr(test, assert_instr(vpsrld))]
2948#[stable(feature = "simd_x86", since = "1.27.0")]
2949pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
2950 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
2951}
2952
2953#[inline]
2958#[target_feature(enable = "avx2")]
2959#[cfg_attr(test, assert_instr(vpsrlq))]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
2962 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
2963}
2964
2965#[inline]
2970#[target_feature(enable = "avx2")]
2971#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
2972#[rustc_legacy_const_generics(1)]
2973#[stable(feature = "simd_x86", since = "1.27.0")]
2974pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2975 static_assert_uimm_bits!(IMM8, 8);
2976 unsafe {
2977 if IMM8 >= 16 {
2978 _mm256_setzero_si256()
2979 } else {
2980 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2981 }
2982 }
2983}
2984
2985#[inline]
2990#[target_feature(enable = "avx2")]
2991#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
2992#[rustc_legacy_const_generics(1)]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2995 static_assert_uimm_bits!(IMM8, 8);
2996 unsafe {
2997 if IMM8 >= 32 {
2998 _mm256_setzero_si256()
2999 } else {
3000 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3001 }
3002 }
3003}
3004
3005#[inline]
3010#[target_feature(enable = "avx2")]
3011#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3012#[rustc_legacy_const_generics(1)]
3013#[stable(feature = "simd_x86", since = "1.27.0")]
3014pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3015 static_assert_uimm_bits!(IMM8, 8);
3016 unsafe {
3017 if IMM8 >= 64 {
3018 _mm256_setzero_si256()
3019 } else {
3020 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3021 }
3022 }
3023}
3024
3025#[inline]
3030#[target_feature(enable = "avx2")]
3031#[cfg_attr(test, assert_instr(vpsrlvd))]
3032#[stable(feature = "simd_x86", since = "1.27.0")]
3033pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3034 unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3035}
3036
3037#[inline]
3042#[target_feature(enable = "avx2")]
3043#[cfg_attr(test, assert_instr(vpsrlvd))]
3044#[stable(feature = "simd_x86", since = "1.27.0")]
3045pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3046 unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3047}
3048
3049#[inline]
3054#[target_feature(enable = "avx2")]
3055#[cfg_attr(test, assert_instr(vpsrlvq))]
3056#[stable(feature = "simd_x86", since = "1.27.0")]
3057pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3058 unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3059}
3060
3061#[inline]
3066#[target_feature(enable = "avx2")]
3067#[cfg_attr(test, assert_instr(vpsrlvq))]
3068#[stable(feature = "simd_x86", since = "1.27.0")]
3069pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3070 unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3071}
3072
3073#[inline]
3079#[target_feature(enable = "avx2")]
3080#[cfg_attr(test, assert_instr(vmovntdqa))]
3081#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3082pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3083 let dst: __m256i;
3084 crate::arch::asm!(
3085 vpl!("vmovntdqa {a}"),
3086 a = out(ymm_reg) dst,
3087 p = in(reg) mem_addr,
3088 options(pure, readonly, nostack, preserves_flags),
3089 );
3090 dst
3091}
3092
3093#[inline]
3097#[target_feature(enable = "avx2")]
3098#[cfg_attr(test, assert_instr(vpsubw))]
3099#[stable(feature = "simd_x86", since = "1.27.0")]
3100pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3101 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3102}
3103
3104#[inline]
3108#[target_feature(enable = "avx2")]
3109#[cfg_attr(test, assert_instr(vpsubd))]
3110#[stable(feature = "simd_x86", since = "1.27.0")]
3111pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3112 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3113}
3114
3115#[inline]
3119#[target_feature(enable = "avx2")]
3120#[cfg_attr(test, assert_instr(vpsubq))]
3121#[stable(feature = "simd_x86", since = "1.27.0")]
3122pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3123 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3124}
3125
3126#[inline]
3130#[target_feature(enable = "avx2")]
3131#[cfg_attr(test, assert_instr(vpsubb))]
3132#[stable(feature = "simd_x86", since = "1.27.0")]
3133pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3134 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3135}
3136
3137#[inline]
3142#[target_feature(enable = "avx2")]
3143#[cfg_attr(test, assert_instr(vpsubsw))]
3144#[stable(feature = "simd_x86", since = "1.27.0")]
3145pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3146 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3147}
3148
3149#[inline]
3154#[target_feature(enable = "avx2")]
3155#[cfg_attr(test, assert_instr(vpsubsb))]
3156#[stable(feature = "simd_x86", since = "1.27.0")]
3157pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3158 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3159}
3160
3161#[inline]
3166#[target_feature(enable = "avx2")]
3167#[cfg_attr(test, assert_instr(vpsubusw))]
3168#[stable(feature = "simd_x86", since = "1.27.0")]
3169pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3170 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3171}
3172
3173#[inline]
3178#[target_feature(enable = "avx2")]
3179#[cfg_attr(test, assert_instr(vpsubusb))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3182 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3183}
3184
3185#[inline]
3225#[target_feature(enable = "avx2")]
3226#[cfg_attr(test, assert_instr(vpunpckhbw))]
3227#[stable(feature = "simd_x86", since = "1.27.0")]
3228pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3229 unsafe {
3230 #[rustfmt::skip]
3231 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3232 8, 40, 9, 41, 10, 42, 11, 43,
3233 12, 44, 13, 45, 14, 46, 15, 47,
3234 24, 56, 25, 57, 26, 58, 27, 59,
3235 28, 60, 29, 61, 30, 62, 31, 63,
3236 ]);
3237 transmute(r)
3238 }
3239}
3240
3241#[inline]
3280#[target_feature(enable = "avx2")]
3281#[cfg_attr(test, assert_instr(vpunpcklbw))]
3282#[stable(feature = "simd_x86", since = "1.27.0")]
3283pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3284 unsafe {
3285 #[rustfmt::skip]
3286 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3287 0, 32, 1, 33, 2, 34, 3, 35,
3288 4, 36, 5, 37, 6, 38, 7, 39,
3289 16, 48, 17, 49, 18, 50, 19, 51,
3290 20, 52, 21, 53, 22, 54, 23, 55,
3291 ]);
3292 transmute(r)
3293 }
3294}
3295
3296#[inline]
3331#[target_feature(enable = "avx2")]
3332#[cfg_attr(test, assert_instr(vpunpckhwd))]
3333#[stable(feature = "simd_x86", since = "1.27.0")]
3334pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3335 unsafe {
3336 let r: i16x16 = simd_shuffle!(
3337 a.as_i16x16(),
3338 b.as_i16x16(),
3339 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3340 );
3341 transmute(r)
3342 }
3343}
3344
3345#[inline]
3381#[target_feature(enable = "avx2")]
3382#[cfg_attr(test, assert_instr(vpunpcklwd))]
3383#[stable(feature = "simd_x86", since = "1.27.0")]
3384pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3385 unsafe {
3386 let r: i16x16 = simd_shuffle!(
3387 a.as_i16x16(),
3388 b.as_i16x16(),
3389 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3390 );
3391 transmute(r)
3392 }
3393}
3394
3395#[inline]
3424#[target_feature(enable = "avx2")]
3425#[cfg_attr(test, assert_instr(vunpckhps))]
3426#[stable(feature = "simd_x86", since = "1.27.0")]
3427pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3428 unsafe {
3429 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3430 transmute(r)
3431 }
3432}
3433
3434#[inline]
3463#[target_feature(enable = "avx2")]
3464#[cfg_attr(test, assert_instr(vunpcklps))]
3465#[stable(feature = "simd_x86", since = "1.27.0")]
3466pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3467 unsafe {
3468 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3469 transmute(r)
3470 }
3471}
3472
3473#[inline]
3502#[target_feature(enable = "avx2")]
3503#[cfg_attr(test, assert_instr(vunpckhpd))]
3504#[stable(feature = "simd_x86", since = "1.27.0")]
3505pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3506 unsafe {
3507 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3508 transmute(r)
3509 }
3510}
3511
3512#[inline]
3541#[target_feature(enable = "avx2")]
3542#[cfg_attr(test, assert_instr(vunpcklpd))]
3543#[stable(feature = "simd_x86", since = "1.27.0")]
3544pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3545 unsafe {
3546 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3547 transmute(r)
3548 }
3549}
3550
3551#[inline]
3556#[target_feature(enable = "avx2")]
3557#[cfg_attr(test, assert_instr(vxorps))]
3558#[stable(feature = "simd_x86", since = "1.27.0")]
3559pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3560 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3561}
3562
3563#[inline]
3570#[target_feature(enable = "avx2")]
3571#[rustc_legacy_const_generics(1)]
3573#[stable(feature = "simd_x86", since = "1.27.0")]
3574pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3575 static_assert_uimm_bits!(INDEX, 5);
3576 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3577}
3578
3579#[inline]
3586#[target_feature(enable = "avx2")]
3587#[rustc_legacy_const_generics(1)]
3589#[stable(feature = "simd_x86", since = "1.27.0")]
3590pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3591 static_assert_uimm_bits!(INDEX, 4);
3592 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3593}
3594
3595#[allow(improper_ctypes)]
3596unsafe extern "C" {
3597 #[link_name = "llvm.x86.avx2.phadd.w"]
3598 fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3599 #[link_name = "llvm.x86.avx2.phadd.d"]
3600 fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3601 #[link_name = "llvm.x86.avx2.phadd.sw"]
3602 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3603 #[link_name = "llvm.x86.avx2.phsub.w"]
3604 fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3605 #[link_name = "llvm.x86.avx2.phsub.d"]
3606 fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3607 #[link_name = "llvm.x86.avx2.phsub.sw"]
3608 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3609 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3610 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3611 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3612 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3613 #[link_name = "llvm.x86.avx2.maskload.d"]
3614 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3615 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3616 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3617 #[link_name = "llvm.x86.avx2.maskload.q"]
3618 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3619 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3620 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3621 #[link_name = "llvm.x86.avx2.maskstore.d"]
3622 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3623 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3624 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3625 #[link_name = "llvm.x86.avx2.maskstore.q"]
3626 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3627 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3628 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3629 #[link_name = "llvm.x86.avx2.mpsadbw"]
3630 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3631 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3632 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3633 #[link_name = "llvm.x86.avx2.packsswb"]
3634 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3635 #[link_name = "llvm.x86.avx2.packssdw"]
3636 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3637 #[link_name = "llvm.x86.avx2.packuswb"]
3638 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3639 #[link_name = "llvm.x86.avx2.packusdw"]
3640 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3641 #[link_name = "llvm.x86.avx2.psad.bw"]
3642 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3643 #[link_name = "llvm.x86.avx2.psign.b"]
3644 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3645 #[link_name = "llvm.x86.avx2.psign.w"]
3646 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3647 #[link_name = "llvm.x86.avx2.psign.d"]
3648 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3649 #[link_name = "llvm.x86.avx2.psll.w"]
3650 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3651 #[link_name = "llvm.x86.avx2.psll.d"]
3652 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3653 #[link_name = "llvm.x86.avx2.psll.q"]
3654 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3655 #[link_name = "llvm.x86.avx2.psllv.d"]
3656 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3657 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3658 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3659 #[link_name = "llvm.x86.avx2.psllv.q"]
3660 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3661 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3662 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3663 #[link_name = "llvm.x86.avx2.psra.w"]
3664 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3665 #[link_name = "llvm.x86.avx2.psra.d"]
3666 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3667 #[link_name = "llvm.x86.avx2.psrav.d"]
3668 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3669 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3670 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3671 #[link_name = "llvm.x86.avx2.psrl.w"]
3672 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3673 #[link_name = "llvm.x86.avx2.psrl.d"]
3674 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3675 #[link_name = "llvm.x86.avx2.psrl.q"]
3676 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3677 #[link_name = "llvm.x86.avx2.psrlv.d"]
3678 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3679 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3680 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3681 #[link_name = "llvm.x86.avx2.psrlv.q"]
3682 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3683 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3684 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3685 #[link_name = "llvm.x86.avx2.pshuf.b"]
3686 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3687 #[link_name = "llvm.x86.avx2.permd"]
3688 fn permd(a: u32x8, b: u32x8) -> u32x8;
3689 #[link_name = "llvm.x86.avx2.permps"]
3690 fn permps(a: __m256, b: i32x8) -> __m256;
3691 #[link_name = "llvm.x86.avx2.vperm2i128"]
3692 fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3693 #[link_name = "llvm.x86.avx2.gather.d.d"]
3694 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3695 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3696 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3697 #[link_name = "llvm.x86.avx2.gather.d.q"]
3698 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3699 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3700 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3701 #[link_name = "llvm.x86.avx2.gather.q.d"]
3702 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3703 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3704 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3705 #[link_name = "llvm.x86.avx2.gather.q.q"]
3706 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3707 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3708 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3709 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3710 fn pgatherdpd(
3711 src: __m128d,
3712 slice: *const i8,
3713 offsets: i32x4,
3714 mask: __m128d,
3715 scale: i8,
3716 ) -> __m128d;
3717 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3718 fn vpgatherdpd(
3719 src: __m256d,
3720 slice: *const i8,
3721 offsets: i32x4,
3722 mask: __m256d,
3723 scale: i8,
3724 ) -> __m256d;
3725 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3726 fn pgatherqpd(
3727 src: __m128d,
3728 slice: *const i8,
3729 offsets: i64x2,
3730 mask: __m128d,
3731 scale: i8,
3732 ) -> __m128d;
3733 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3734 fn vpgatherqpd(
3735 src: __m256d,
3736 slice: *const i8,
3737 offsets: i64x4,
3738 mask: __m256d,
3739 scale: i8,
3740 ) -> __m256d;
3741 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3742 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3743 -> __m128;
3744 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3745 fn vpgatherdps(
3746 src: __m256,
3747 slice: *const i8,
3748 offsets: i32x8,
3749 mask: __m256,
3750 scale: i8,
3751 ) -> __m256;
3752 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3753 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3754 -> __m128;
3755 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3756 fn vpgatherqps(
3757 src: __m128,
3758 slice: *const i8,
3759 offsets: i64x4,
3760 mask: __m128,
3761 scale: i8,
3762 ) -> __m128;
3763}
3764
3765#[cfg(test)]
3766mod tests {
3767
3768 use stdarch_test::simd_test;
3769
3770 use crate::core_arch::x86::*;
3771
3772 #[simd_test(enable = "avx2")]
3773 unsafe fn test_mm256_abs_epi32() {
3774 #[rustfmt::skip]
3775 let a = _mm256_setr_epi32(
3776 0, 1, -1, i32::MAX,
3777 i32::MIN, 100, -100, -32,
3778 );
3779 let r = _mm256_abs_epi32(a);
3780 #[rustfmt::skip]
3781 let e = _mm256_setr_epi32(
3782 0, 1, 1, i32::MAX,
3783 i32::MAX.wrapping_add(1), 100, 100, 32,
3784 );
3785 assert_eq_m256i(r, e);
3786 }
3787
3788 #[simd_test(enable = "avx2")]
3789 unsafe fn test_mm256_abs_epi16() {
3790 #[rustfmt::skip]
3791 let a = _mm256_setr_epi16(
3792 0, 1, -1, 2, -2, 3, -3, 4,
3793 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3794 );
3795 let r = _mm256_abs_epi16(a);
3796 #[rustfmt::skip]
3797 let e = _mm256_setr_epi16(
3798 0, 1, 1, 2, 2, 3, 3, 4,
3799 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3800 );
3801 assert_eq_m256i(r, e);
3802 }
3803
3804 #[simd_test(enable = "avx2")]
3805 unsafe fn test_mm256_abs_epi8() {
3806 #[rustfmt::skip]
3807 let a = _mm256_setr_epi8(
3808 0, 1, -1, 2, -2, 3, -3, 4,
3809 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3810 0, 1, -1, 2, -2, 3, -3, 4,
3811 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3812 );
3813 let r = _mm256_abs_epi8(a);
3814 #[rustfmt::skip]
3815 let e = _mm256_setr_epi8(
3816 0, 1, 1, 2, 2, 3, 3, 4,
3817 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3818 0, 1, 1, 2, 2, 3, 3, 4,
3819 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3820 );
3821 assert_eq_m256i(r, e);
3822 }
3823
3824 #[simd_test(enable = "avx2")]
3825 unsafe fn test_mm256_add_epi64() {
3826 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3827 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3828 let r = _mm256_add_epi64(a, b);
3829 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3830 assert_eq_m256i(r, e);
3831 }
3832
3833 #[simd_test(enable = "avx2")]
3834 unsafe fn test_mm256_add_epi32() {
3835 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3836 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3837 let r = _mm256_add_epi32(a, b);
3838 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3839 assert_eq_m256i(r, e);
3840 }
3841
3842 #[simd_test(enable = "avx2")]
3843 unsafe fn test_mm256_add_epi16() {
3844 #[rustfmt::skip]
3845 let a = _mm256_setr_epi16(
3846 0, 1, 2, 3, 4, 5, 6, 7,
3847 8, 9, 10, 11, 12, 13, 14, 15,
3848 );
3849 #[rustfmt::skip]
3850 let b = _mm256_setr_epi16(
3851 0, 1, 2, 3, 4, 5, 6, 7,
3852 8, 9, 10, 11, 12, 13, 14, 15,
3853 );
3854 let r = _mm256_add_epi16(a, b);
3855 #[rustfmt::skip]
3856 let e = _mm256_setr_epi16(
3857 0, 2, 4, 6, 8, 10, 12, 14,
3858 16, 18, 20, 22, 24, 26, 28, 30,
3859 );
3860 assert_eq_m256i(r, e);
3861 }
3862
3863 #[simd_test(enable = "avx2")]
3864 unsafe fn test_mm256_add_epi8() {
3865 #[rustfmt::skip]
3866 let a = _mm256_setr_epi8(
3867 0, 1, 2, 3, 4, 5, 6, 7,
3868 8, 9, 10, 11, 12, 13, 14, 15,
3869 16, 17, 18, 19, 20, 21, 22, 23,
3870 24, 25, 26, 27, 28, 29, 30, 31,
3871 );
3872 #[rustfmt::skip]
3873 let b = _mm256_setr_epi8(
3874 0, 1, 2, 3, 4, 5, 6, 7,
3875 8, 9, 10, 11, 12, 13, 14, 15,
3876 16, 17, 18, 19, 20, 21, 22, 23,
3877 24, 25, 26, 27, 28, 29, 30, 31,
3878 );
3879 let r = _mm256_add_epi8(a, b);
3880 #[rustfmt::skip]
3881 let e = _mm256_setr_epi8(
3882 0, 2, 4, 6, 8, 10, 12, 14,
3883 16, 18, 20, 22, 24, 26, 28, 30,
3884 32, 34, 36, 38, 40, 42, 44, 46,
3885 48, 50, 52, 54, 56, 58, 60, 62,
3886 );
3887 assert_eq_m256i(r, e);
3888 }
3889
3890 #[simd_test(enable = "avx2")]
3891 unsafe fn test_mm256_adds_epi8() {
3892 #[rustfmt::skip]
3893 let a = _mm256_setr_epi8(
3894 0, 1, 2, 3, 4, 5, 6, 7,
3895 8, 9, 10, 11, 12, 13, 14, 15,
3896 16, 17, 18, 19, 20, 21, 22, 23,
3897 24, 25, 26, 27, 28, 29, 30, 31,
3898 );
3899 #[rustfmt::skip]
3900 let b = _mm256_setr_epi8(
3901 32, 33, 34, 35, 36, 37, 38, 39,
3902 40, 41, 42, 43, 44, 45, 46, 47,
3903 48, 49, 50, 51, 52, 53, 54, 55,
3904 56, 57, 58, 59, 60, 61, 62, 63,
3905 );
3906 let r = _mm256_adds_epi8(a, b);
3907 #[rustfmt::skip]
3908 let e = _mm256_setr_epi8(
3909 32, 34, 36, 38, 40, 42, 44, 46,
3910 48, 50, 52, 54, 56, 58, 60, 62,
3911 64, 66, 68, 70, 72, 74, 76, 78,
3912 80, 82, 84, 86, 88, 90, 92, 94,
3913 );
3914 assert_eq_m256i(r, e);
3915 }
3916
3917 #[simd_test(enable = "avx2")]
3918 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3919 let a = _mm256_set1_epi8(0x7F);
3920 let b = _mm256_set1_epi8(1);
3921 let r = _mm256_adds_epi8(a, b);
3922 assert_eq_m256i(r, a);
3923 }
3924
3925 #[simd_test(enable = "avx2")]
3926 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3927 let a = _mm256_set1_epi8(-0x80);
3928 let b = _mm256_set1_epi8(-1);
3929 let r = _mm256_adds_epi8(a, b);
3930 assert_eq_m256i(r, a);
3931 }
3932
3933 #[simd_test(enable = "avx2")]
3934 unsafe fn test_mm256_adds_epi16() {
3935 #[rustfmt::skip]
3936 let a = _mm256_setr_epi16(
3937 0, 1, 2, 3, 4, 5, 6, 7,
3938 8, 9, 10, 11, 12, 13, 14, 15,
3939 );
3940 #[rustfmt::skip]
3941 let b = _mm256_setr_epi16(
3942 32, 33, 34, 35, 36, 37, 38, 39,
3943 40, 41, 42, 43, 44, 45, 46, 47,
3944 );
3945 let r = _mm256_adds_epi16(a, b);
3946 #[rustfmt::skip]
3947 let e = _mm256_setr_epi16(
3948 32, 34, 36, 38, 40, 42, 44, 46,
3949 48, 50, 52, 54, 56, 58, 60, 62,
3950 );
3951
3952 assert_eq_m256i(r, e);
3953 }
3954
3955 #[simd_test(enable = "avx2")]
3956 unsafe fn test_mm256_adds_epi16_saturate_positive() {
3957 let a = _mm256_set1_epi16(0x7FFF);
3958 let b = _mm256_set1_epi16(1);
3959 let r = _mm256_adds_epi16(a, b);
3960 assert_eq_m256i(r, a);
3961 }
3962
3963 #[simd_test(enable = "avx2")]
3964 unsafe fn test_mm256_adds_epi16_saturate_negative() {
3965 let a = _mm256_set1_epi16(-0x8000);
3966 let b = _mm256_set1_epi16(-1);
3967 let r = _mm256_adds_epi16(a, b);
3968 assert_eq_m256i(r, a);
3969 }
3970
3971 #[simd_test(enable = "avx2")]
3972 unsafe fn test_mm256_adds_epu8() {
3973 #[rustfmt::skip]
3974 let a = _mm256_setr_epi8(
3975 0, 1, 2, 3, 4, 5, 6, 7,
3976 8, 9, 10, 11, 12, 13, 14, 15,
3977 16, 17, 18, 19, 20, 21, 22, 23,
3978 24, 25, 26, 27, 28, 29, 30, 31,
3979 );
3980 #[rustfmt::skip]
3981 let b = _mm256_setr_epi8(
3982 32, 33, 34, 35, 36, 37, 38, 39,
3983 40, 41, 42, 43, 44, 45, 46, 47,
3984 48, 49, 50, 51, 52, 53, 54, 55,
3985 56, 57, 58, 59, 60, 61, 62, 63,
3986 );
3987 let r = _mm256_adds_epu8(a, b);
3988 #[rustfmt::skip]
3989 let e = _mm256_setr_epi8(
3990 32, 34, 36, 38, 40, 42, 44, 46,
3991 48, 50, 52, 54, 56, 58, 60, 62,
3992 64, 66, 68, 70, 72, 74, 76, 78,
3993 80, 82, 84, 86, 88, 90, 92, 94,
3994 );
3995 assert_eq_m256i(r, e);
3996 }
3997
3998 #[simd_test(enable = "avx2")]
3999 unsafe fn test_mm256_adds_epu8_saturate() {
4000 let a = _mm256_set1_epi8(!0);
4001 let b = _mm256_set1_epi8(1);
4002 let r = _mm256_adds_epu8(a, b);
4003 assert_eq_m256i(r, a);
4004 }
4005
4006 #[simd_test(enable = "avx2")]
4007 unsafe fn test_mm256_adds_epu16() {
4008 #[rustfmt::skip]
4009 let a = _mm256_setr_epi16(
4010 0, 1, 2, 3, 4, 5, 6, 7,
4011 8, 9, 10, 11, 12, 13, 14, 15,
4012 );
4013 #[rustfmt::skip]
4014 let b = _mm256_setr_epi16(
4015 32, 33, 34, 35, 36, 37, 38, 39,
4016 40, 41, 42, 43, 44, 45, 46, 47,
4017 );
4018 let r = _mm256_adds_epu16(a, b);
4019 #[rustfmt::skip]
4020 let e = _mm256_setr_epi16(
4021 32, 34, 36, 38, 40, 42, 44, 46,
4022 48, 50, 52, 54, 56, 58, 60, 62,
4023 );
4024
4025 assert_eq_m256i(r, e);
4026 }
4027
4028 #[simd_test(enable = "avx2")]
4029 unsafe fn test_mm256_adds_epu16_saturate() {
4030 let a = _mm256_set1_epi16(!0);
4031 let b = _mm256_set1_epi16(1);
4032 let r = _mm256_adds_epu16(a, b);
4033 assert_eq_m256i(r, a);
4034 }
4035
4036 #[simd_test(enable = "avx2")]
4037 unsafe fn test_mm256_and_si256() {
4038 let a = _mm256_set1_epi8(5);
4039 let b = _mm256_set1_epi8(3);
4040 let got = _mm256_and_si256(a, b);
4041 assert_eq_m256i(got, _mm256_set1_epi8(1));
4042 }
4043
4044 #[simd_test(enable = "avx2")]
4045 unsafe fn test_mm256_andnot_si256() {
4046 let a = _mm256_set1_epi8(5);
4047 let b = _mm256_set1_epi8(3);
4048 let got = _mm256_andnot_si256(a, b);
4049 assert_eq_m256i(got, _mm256_set1_epi8(2));
4050 }
4051
4052 #[simd_test(enable = "avx2")]
4053 unsafe fn test_mm256_avg_epu8() {
4054 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4055 let r = _mm256_avg_epu8(a, b);
4056 assert_eq_m256i(r, _mm256_set1_epi8(6));
4057 }
4058
4059 #[simd_test(enable = "avx2")]
4060 unsafe fn test_mm256_avg_epu16() {
4061 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4062 let r = _mm256_avg_epu16(a, b);
4063 assert_eq_m256i(r, _mm256_set1_epi16(6));
4064 }
4065
4066 #[simd_test(enable = "avx2")]
4067 unsafe fn test_mm_blend_epi32() {
4068 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4069 let e = _mm_setr_epi32(9, 3, 3, 3);
4070 let r = _mm_blend_epi32::<0x01>(a, b);
4071 assert_eq_m128i(r, e);
4072
4073 let r = _mm_blend_epi32::<0x0E>(b, a);
4074 assert_eq_m128i(r, e);
4075 }
4076
4077 #[simd_test(enable = "avx2")]
4078 unsafe fn test_mm256_blend_epi32() {
4079 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4080 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4081 let r = _mm256_blend_epi32::<0x01>(a, b);
4082 assert_eq_m256i(r, e);
4083
4084 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4085 let r = _mm256_blend_epi32::<0x82>(a, b);
4086 assert_eq_m256i(r, e);
4087
4088 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4089 let r = _mm256_blend_epi32::<0x7C>(a, b);
4090 assert_eq_m256i(r, e);
4091 }
4092
4093 #[simd_test(enable = "avx2")]
4094 unsafe fn test_mm256_blend_epi16() {
4095 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4096 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4097 let r = _mm256_blend_epi16::<0x01>(a, b);
4098 assert_eq_m256i(r, e);
4099
4100 let r = _mm256_blend_epi16::<0xFE>(b, a);
4101 assert_eq_m256i(r, e);
4102 }
4103
4104 #[simd_test(enable = "avx2")]
4105 unsafe fn test_mm256_blendv_epi8() {
4106 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4107 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4108 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4109 let r = _mm256_blendv_epi8(a, b, mask);
4110 assert_eq_m256i(r, e);
4111 }
4112
4113 #[simd_test(enable = "avx2")]
4114 unsafe fn test_mm_broadcastb_epi8() {
4115 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4116 let res = _mm_broadcastb_epi8(a);
4117 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4118 }
4119
4120 #[simd_test(enable = "avx2")]
4121 unsafe fn test_mm256_broadcastb_epi8() {
4122 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4123 let res = _mm256_broadcastb_epi8(a);
4124 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4125 }
4126
4127 #[simd_test(enable = "avx2")]
4128 unsafe fn test_mm_broadcastd_epi32() {
4129 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4130 let res = _mm_broadcastd_epi32(a);
4131 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4132 }
4133
4134 #[simd_test(enable = "avx2")]
4135 unsafe fn test_mm256_broadcastd_epi32() {
4136 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4137 let res = _mm256_broadcastd_epi32(a);
4138 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4139 }
4140
4141 #[simd_test(enable = "avx2")]
4142 unsafe fn test_mm_broadcastq_epi64() {
4143 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4144 let res = _mm_broadcastq_epi64(a);
4145 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4146 }
4147
4148 #[simd_test(enable = "avx2")]
4149 unsafe fn test_mm256_broadcastq_epi64() {
4150 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4151 let res = _mm256_broadcastq_epi64(a);
4152 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4153 }
4154
4155 #[simd_test(enable = "avx2")]
4156 unsafe fn test_mm_broadcastsd_pd() {
4157 let a = _mm_setr_pd(6.88, 3.44);
4158 let res = _mm_broadcastsd_pd(a);
4159 assert_eq_m128d(res, _mm_set1_pd(6.88));
4160 }
4161
4162 #[simd_test(enable = "avx2")]
4163 unsafe fn test_mm256_broadcastsd_pd() {
4164 let a = _mm_setr_pd(6.88, 3.44);
4165 let res = _mm256_broadcastsd_pd(a);
4166 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4167 }
4168
4169 #[simd_test(enable = "avx2")]
4170 unsafe fn test_mm_broadcastsi128_si256() {
4171 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4172 let res = _mm_broadcastsi128_si256(a);
4173 let retval = _mm256_setr_epi64x(
4174 0x0987654321012334,
4175 0x5678909876543210,
4176 0x0987654321012334,
4177 0x5678909876543210,
4178 );
4179 assert_eq_m256i(res, retval);
4180 }
4181
4182 #[simd_test(enable = "avx2")]
4183 unsafe fn test_mm256_broadcastsi128_si256() {
4184 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4185 let res = _mm256_broadcastsi128_si256(a);
4186 let retval = _mm256_setr_epi64x(
4187 0x0987654321012334,
4188 0x5678909876543210,
4189 0x0987654321012334,
4190 0x5678909876543210,
4191 );
4192 assert_eq_m256i(res, retval);
4193 }
4194
4195 #[simd_test(enable = "avx2")]
4196 unsafe fn test_mm_broadcastss_ps() {
4197 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4198 let res = _mm_broadcastss_ps(a);
4199 assert_eq_m128(res, _mm_set1_ps(6.88));
4200 }
4201
4202 #[simd_test(enable = "avx2")]
4203 unsafe fn test_mm256_broadcastss_ps() {
4204 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4205 let res = _mm256_broadcastss_ps(a);
4206 assert_eq_m256(res, _mm256_set1_ps(6.88));
4207 }
4208
4209 #[simd_test(enable = "avx2")]
4210 unsafe fn test_mm_broadcastw_epi16() {
4211 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4212 let res = _mm_broadcastw_epi16(a);
4213 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4214 }
4215
4216 #[simd_test(enable = "avx2")]
4217 unsafe fn test_mm256_broadcastw_epi16() {
4218 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4219 let res = _mm256_broadcastw_epi16(a);
4220 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4221 }
4222
4223 #[simd_test(enable = "avx2")]
4224 unsafe fn test_mm256_cmpeq_epi8() {
4225 #[rustfmt::skip]
4226 let a = _mm256_setr_epi8(
4227 0, 1, 2, 3, 4, 5, 6, 7,
4228 8, 9, 10, 11, 12, 13, 14, 15,
4229 16, 17, 18, 19, 20, 21, 22, 23,
4230 24, 25, 26, 27, 28, 29, 30, 31,
4231 );
4232 #[rustfmt::skip]
4233 let b = _mm256_setr_epi8(
4234 31, 30, 2, 28, 27, 26, 25, 24,
4235 23, 22, 21, 20, 19, 18, 17, 16,
4236 15, 14, 13, 12, 11, 10, 9, 8,
4237 7, 6, 5, 4, 3, 2, 1, 0,
4238 );
4239 let r = _mm256_cmpeq_epi8(a, b);
4240 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4241 }
4242
4243 #[simd_test(enable = "avx2")]
4244 unsafe fn test_mm256_cmpeq_epi16() {
4245 #[rustfmt::skip]
4246 let a = _mm256_setr_epi16(
4247 0, 1, 2, 3, 4, 5, 6, 7,
4248 8, 9, 10, 11, 12, 13, 14, 15,
4249 );
4250 #[rustfmt::skip]
4251 let b = _mm256_setr_epi16(
4252 15, 14, 2, 12, 11, 10, 9, 8,
4253 7, 6, 5, 4, 3, 2, 1, 0,
4254 );
4255 let r = _mm256_cmpeq_epi16(a, b);
4256 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4257 }
4258
4259 #[simd_test(enable = "avx2")]
4260 unsafe fn test_mm256_cmpeq_epi32() {
4261 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4262 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4263 let r = _mm256_cmpeq_epi32(a, b);
4264 let e = _mm256_set1_epi32(0);
4265 let e = _mm256_insert_epi32::<2>(e, !0);
4266 assert_eq_m256i(r, e);
4267 }
4268
4269 #[simd_test(enable = "avx2")]
4270 unsafe fn test_mm256_cmpeq_epi64() {
4271 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4272 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4273 let r = _mm256_cmpeq_epi64(a, b);
4274 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4275 }
4276
4277 #[simd_test(enable = "avx2")]
4278 unsafe fn test_mm256_cmpgt_epi8() {
4279 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4280 let b = _mm256_set1_epi8(0);
4281 let r = _mm256_cmpgt_epi8(a, b);
4282 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4283 }
4284
4285 #[simd_test(enable = "avx2")]
4286 unsafe fn test_mm256_cmpgt_epi16() {
4287 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4288 let b = _mm256_set1_epi16(0);
4289 let r = _mm256_cmpgt_epi16(a, b);
4290 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4291 }
4292
4293 #[simd_test(enable = "avx2")]
4294 unsafe fn test_mm256_cmpgt_epi32() {
4295 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4296 let b = _mm256_set1_epi32(0);
4297 let r = _mm256_cmpgt_epi32(a, b);
4298 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4299 }
4300
4301 #[simd_test(enable = "avx2")]
4302 unsafe fn test_mm256_cmpgt_epi64() {
4303 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4304 let b = _mm256_set1_epi64x(0);
4305 let r = _mm256_cmpgt_epi64(a, b);
4306 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4307 }
4308
4309 #[simd_test(enable = "avx2")]
4310 unsafe fn test_mm256_cvtepi8_epi16() {
4311 #[rustfmt::skip]
4312 let a = _mm_setr_epi8(
4313 0, 0, -1, 1, -2, 2, -3, 3,
4314 -4, 4, -5, 5, -6, 6, -7, 7,
4315 );
4316 #[rustfmt::skip]
4317 let r = _mm256_setr_epi16(
4318 0, 0, -1, 1, -2, 2, -3, 3,
4319 -4, 4, -5, 5, -6, 6, -7, 7,
4320 );
4321 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4322 }
4323
4324 #[simd_test(enable = "avx2")]
4325 unsafe fn test_mm256_cvtepi8_epi32() {
4326 #[rustfmt::skip]
4327 let a = _mm_setr_epi8(
4328 0, 0, -1, 1, -2, 2, -3, 3,
4329 -4, 4, -5, 5, -6, 6, -7, 7,
4330 );
4331 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4332 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4333 }
4334
4335 #[simd_test(enable = "avx2")]
4336 unsafe fn test_mm256_cvtepi8_epi64() {
4337 #[rustfmt::skip]
4338 let a = _mm_setr_epi8(
4339 0, 0, -1, 1, -2, 2, -3, 3,
4340 -4, 4, -5, 5, -6, 6, -7, 7,
4341 );
4342 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4343 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4344 }
4345
4346 #[simd_test(enable = "avx2")]
4347 unsafe fn test_mm256_cvtepi16_epi32() {
4348 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4349 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4350 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4351 }
4352
4353 #[simd_test(enable = "avx2")]
4354 unsafe fn test_mm256_cvtepi16_epi64() {
4355 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4356 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4357 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4358 }
4359
4360 #[simd_test(enable = "avx2")]
4361 unsafe fn test_mm256_cvtepi32_epi64() {
4362 let a = _mm_setr_epi32(0, 0, -1, 1);
4363 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4364 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4365 }
4366
4367 #[simd_test(enable = "avx2")]
4368 unsafe fn test_mm256_cvtepu16_epi32() {
4369 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4370 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4371 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4372 }
4373
4374 #[simd_test(enable = "avx2")]
4375 unsafe fn test_mm256_cvtepu16_epi64() {
4376 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4377 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4378 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4379 }
4380
4381 #[simd_test(enable = "avx2")]
4382 unsafe fn test_mm256_cvtepu32_epi64() {
4383 let a = _mm_setr_epi32(0, 1, 2, 3);
4384 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4385 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4386 }
4387
4388 #[simd_test(enable = "avx2")]
4389 unsafe fn test_mm256_cvtepu8_epi16() {
4390 #[rustfmt::skip]
4391 let a = _mm_setr_epi8(
4392 0, 1, 2, 3, 4, 5, 6, 7,
4393 8, 9, 10, 11, 12, 13, 14, 15,
4394 );
4395 #[rustfmt::skip]
4396 let r = _mm256_setr_epi16(
4397 0, 1, 2, 3, 4, 5, 6, 7,
4398 8, 9, 10, 11, 12, 13, 14, 15,
4399 );
4400 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4401 }
4402
4403 #[simd_test(enable = "avx2")]
4404 unsafe fn test_mm256_cvtepu8_epi32() {
4405 #[rustfmt::skip]
4406 let a = _mm_setr_epi8(
4407 0, 1, 2, 3, 4, 5, 6, 7,
4408 8, 9, 10, 11, 12, 13, 14, 15,
4409 );
4410 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4411 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4412 }
4413
4414 #[simd_test(enable = "avx2")]
4415 unsafe fn test_mm256_cvtepu8_epi64() {
4416 #[rustfmt::skip]
4417 let a = _mm_setr_epi8(
4418 0, 1, 2, 3, 4, 5, 6, 7,
4419 8, 9, 10, 11, 12, 13, 14, 15,
4420 );
4421 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4422 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4423 }
4424
4425 #[simd_test(enable = "avx2")]
4426 unsafe fn test_mm256_extracti128_si256() {
4427 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4428 let r = _mm256_extracti128_si256::<1>(a);
4429 let e = _mm_setr_epi64x(3, 4);
4430 assert_eq_m128i(r, e);
4431 }
4432
4433 #[simd_test(enable = "avx2")]
4434 unsafe fn test_mm256_hadd_epi16() {
4435 let a = _mm256_set1_epi16(2);
4436 let b = _mm256_set1_epi16(4);
4437 let r = _mm256_hadd_epi16(a, b);
4438 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4439 assert_eq_m256i(r, e);
4440 }
4441
4442 #[simd_test(enable = "avx2")]
4443 unsafe fn test_mm256_hadd_epi32() {
4444 let a = _mm256_set1_epi32(2);
4445 let b = _mm256_set1_epi32(4);
4446 let r = _mm256_hadd_epi32(a, b);
4447 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4448 assert_eq_m256i(r, e);
4449 }
4450
4451 #[simd_test(enable = "avx2")]
4452 unsafe fn test_mm256_hadds_epi16() {
4453 let a = _mm256_set1_epi16(2);
4454 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4455 let a = _mm256_insert_epi16::<1>(a, 1);
4456 let b = _mm256_set1_epi16(4);
4457 let r = _mm256_hadds_epi16(a, b);
4458 #[rustfmt::skip]
4459 let e = _mm256_setr_epi16(
4460 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4461 4, 4, 4, 4, 8, 8, 8, 8,
4462 );
4463 assert_eq_m256i(r, e);
4464 }
4465
4466 #[simd_test(enable = "avx2")]
4467 unsafe fn test_mm256_hsub_epi16() {
4468 let a = _mm256_set1_epi16(2);
4469 let b = _mm256_set1_epi16(4);
4470 let r = _mm256_hsub_epi16(a, b);
4471 let e = _mm256_set1_epi16(0);
4472 assert_eq_m256i(r, e);
4473 }
4474
4475 #[simd_test(enable = "avx2")]
4476 unsafe fn test_mm256_hsub_epi32() {
4477 let a = _mm256_set1_epi32(2);
4478 let b = _mm256_set1_epi32(4);
4479 let r = _mm256_hsub_epi32(a, b);
4480 let e = _mm256_set1_epi32(0);
4481 assert_eq_m256i(r, e);
4482 }
4483
4484 #[simd_test(enable = "avx2")]
4485 unsafe fn test_mm256_hsubs_epi16() {
4486 let a = _mm256_set1_epi16(2);
4487 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4488 let a = _mm256_insert_epi16::<1>(a, -1);
4489 let b = _mm256_set1_epi16(4);
4490 let r = _mm256_hsubs_epi16(a, b);
4491 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4492 assert_eq_m256i(r, e);
4493 }
4494
4495 #[simd_test(enable = "avx2")]
4496 unsafe fn test_mm256_madd_epi16() {
4497 let a = _mm256_set1_epi16(2);
4498 let b = _mm256_set1_epi16(4);
4499 let r = _mm256_madd_epi16(a, b);
4500 let e = _mm256_set1_epi32(16);
4501 assert_eq_m256i(r, e);
4502 }
4503
4504 #[simd_test(enable = "avx2")]
4505 unsafe fn test_mm256_inserti128_si256() {
4506 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4507 let b = _mm_setr_epi64x(7, 8);
4508 let r = _mm256_inserti128_si256::<1>(a, b);
4509 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4510 assert_eq_m256i(r, e);
4511 }
4512
4513 #[simd_test(enable = "avx2")]
4514 unsafe fn test_mm256_maddubs_epi16() {
4515 let a = _mm256_set1_epi8(2);
4516 let b = _mm256_set1_epi8(4);
4517 let r = _mm256_maddubs_epi16(a, b);
4518 let e = _mm256_set1_epi16(16);
4519 assert_eq_m256i(r, e);
4520 }
4521
4522 #[simd_test(enable = "avx2")]
4523 unsafe fn test_mm_maskload_epi32() {
4524 let nums = [1, 2, 3, 4];
4525 let a = &nums as *const i32;
4526 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4527 let r = _mm_maskload_epi32(a, mask);
4528 let e = _mm_setr_epi32(1, 0, 0, 4);
4529 assert_eq_m128i(r, e);
4530 }
4531
4532 #[simd_test(enable = "avx2")]
4533 unsafe fn test_mm256_maskload_epi32() {
4534 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4535 let a = &nums as *const i32;
4536 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4537 let r = _mm256_maskload_epi32(a, mask);
4538 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4539 assert_eq_m256i(r, e);
4540 }
4541
4542 #[simd_test(enable = "avx2")]
4543 unsafe fn test_mm_maskload_epi64() {
4544 let nums = [1_i64, 2_i64];
4545 let a = &nums as *const i64;
4546 let mask = _mm_setr_epi64x(0, -1);
4547 let r = _mm_maskload_epi64(a, mask);
4548 let e = _mm_setr_epi64x(0, 2);
4549 assert_eq_m128i(r, e);
4550 }
4551
4552 #[simd_test(enable = "avx2")]
4553 unsafe fn test_mm256_maskload_epi64() {
4554 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4555 let a = &nums as *const i64;
4556 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4557 let r = _mm256_maskload_epi64(a, mask);
4558 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4559 assert_eq_m256i(r, e);
4560 }
4561
4562 #[simd_test(enable = "avx2")]
4563 unsafe fn test_mm_maskstore_epi32() {
4564 let a = _mm_setr_epi32(1, 2, 3, 4);
4565 let mut arr = [-1, -1, -1, -1];
4566 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4567 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4568 let e = [1, -1, -1, 4];
4569 assert_eq!(arr, e);
4570 }
4571
4572 #[simd_test(enable = "avx2")]
4573 unsafe fn test_mm256_maskstore_epi32() {
4574 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4575 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4576 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4577 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4578 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4579 assert_eq!(arr, e);
4580 }
4581
4582 #[simd_test(enable = "avx2")]
4583 unsafe fn test_mm_maskstore_epi64() {
4584 let a = _mm_setr_epi64x(1_i64, 2_i64);
4585 let mut arr = [-1_i64, -1_i64];
4586 let mask = _mm_setr_epi64x(0, -1);
4587 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4588 let e = [-1, 2];
4589 assert_eq!(arr, e);
4590 }
4591
4592 #[simd_test(enable = "avx2")]
4593 unsafe fn test_mm256_maskstore_epi64() {
4594 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4595 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4596 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4597 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4598 let e = [-1, 2, 3, -1];
4599 assert_eq!(arr, e);
4600 }
4601
4602 #[simd_test(enable = "avx2")]
4603 unsafe fn test_mm256_max_epi16() {
4604 let a = _mm256_set1_epi16(2);
4605 let b = _mm256_set1_epi16(4);
4606 let r = _mm256_max_epi16(a, b);
4607 assert_eq_m256i(r, b);
4608 }
4609
4610 #[simd_test(enable = "avx2")]
4611 unsafe fn test_mm256_max_epi32() {
4612 let a = _mm256_set1_epi32(2);
4613 let b = _mm256_set1_epi32(4);
4614 let r = _mm256_max_epi32(a, b);
4615 assert_eq_m256i(r, b);
4616 }
4617
4618 #[simd_test(enable = "avx2")]
4619 unsafe fn test_mm256_max_epi8() {
4620 let a = _mm256_set1_epi8(2);
4621 let b = _mm256_set1_epi8(4);
4622 let r = _mm256_max_epi8(a, b);
4623 assert_eq_m256i(r, b);
4624 }
4625
4626 #[simd_test(enable = "avx2")]
4627 unsafe fn test_mm256_max_epu16() {
4628 let a = _mm256_set1_epi16(2);
4629 let b = _mm256_set1_epi16(4);
4630 let r = _mm256_max_epu16(a, b);
4631 assert_eq_m256i(r, b);
4632 }
4633
4634 #[simd_test(enable = "avx2")]
4635 unsafe fn test_mm256_max_epu32() {
4636 let a = _mm256_set1_epi32(2);
4637 let b = _mm256_set1_epi32(4);
4638 let r = _mm256_max_epu32(a, b);
4639 assert_eq_m256i(r, b);
4640 }
4641
4642 #[simd_test(enable = "avx2")]
4643 unsafe fn test_mm256_max_epu8() {
4644 let a = _mm256_set1_epi8(2);
4645 let b = _mm256_set1_epi8(4);
4646 let r = _mm256_max_epu8(a, b);
4647 assert_eq_m256i(r, b);
4648 }
4649
4650 #[simd_test(enable = "avx2")]
4651 unsafe fn test_mm256_min_epi16() {
4652 let a = _mm256_set1_epi16(2);
4653 let b = _mm256_set1_epi16(4);
4654 let r = _mm256_min_epi16(a, b);
4655 assert_eq_m256i(r, a);
4656 }
4657
4658 #[simd_test(enable = "avx2")]
4659 unsafe fn test_mm256_min_epi32() {
4660 let a = _mm256_set1_epi32(2);
4661 let b = _mm256_set1_epi32(4);
4662 let r = _mm256_min_epi32(a, b);
4663 assert_eq_m256i(r, a);
4664 }
4665
4666 #[simd_test(enable = "avx2")]
4667 unsafe fn test_mm256_min_epi8() {
4668 let a = _mm256_set1_epi8(2);
4669 let b = _mm256_set1_epi8(4);
4670 let r = _mm256_min_epi8(a, b);
4671 assert_eq_m256i(r, a);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 unsafe fn test_mm256_min_epu16() {
4676 let a = _mm256_set1_epi16(2);
4677 let b = _mm256_set1_epi16(4);
4678 let r = _mm256_min_epu16(a, b);
4679 assert_eq_m256i(r, a);
4680 }
4681
4682 #[simd_test(enable = "avx2")]
4683 unsafe fn test_mm256_min_epu32() {
4684 let a = _mm256_set1_epi32(2);
4685 let b = _mm256_set1_epi32(4);
4686 let r = _mm256_min_epu32(a, b);
4687 assert_eq_m256i(r, a);
4688 }
4689
4690 #[simd_test(enable = "avx2")]
4691 unsafe fn test_mm256_min_epu8() {
4692 let a = _mm256_set1_epi8(2);
4693 let b = _mm256_set1_epi8(4);
4694 let r = _mm256_min_epu8(a, b);
4695 assert_eq_m256i(r, a);
4696 }
4697
4698 #[simd_test(enable = "avx2")]
4699 unsafe fn test_mm256_movemask_epi8() {
4700 let a = _mm256_set1_epi8(-1);
4701 let r = _mm256_movemask_epi8(a);
4702 let e = -1;
4703 assert_eq!(r, e);
4704 }
4705
4706 #[simd_test(enable = "avx2")]
4707 unsafe fn test_mm256_mpsadbw_epu8() {
4708 let a = _mm256_set1_epi8(2);
4709 let b = _mm256_set1_epi8(4);
4710 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4711 let e = _mm256_set1_epi16(8);
4712 assert_eq_m256i(r, e);
4713 }
4714
4715 #[simd_test(enable = "avx2")]
4716 unsafe fn test_mm256_mul_epi32() {
4717 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4718 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4719 let r = _mm256_mul_epi32(a, b);
4720 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4721 assert_eq_m256i(r, e);
4722 }
4723
4724 #[simd_test(enable = "avx2")]
4725 unsafe fn test_mm256_mul_epu32() {
4726 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4727 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4728 let r = _mm256_mul_epu32(a, b);
4729 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4730 assert_eq_m256i(r, e);
4731 }
4732
4733 #[simd_test(enable = "avx2")]
4734 unsafe fn test_mm256_mulhi_epi16() {
4735 let a = _mm256_set1_epi16(6535);
4736 let b = _mm256_set1_epi16(6535);
4737 let r = _mm256_mulhi_epi16(a, b);
4738 let e = _mm256_set1_epi16(651);
4739 assert_eq_m256i(r, e);
4740 }
4741
4742 #[simd_test(enable = "avx2")]
4743 unsafe fn test_mm256_mulhi_epu16() {
4744 let a = _mm256_set1_epi16(6535);
4745 let b = _mm256_set1_epi16(6535);
4746 let r = _mm256_mulhi_epu16(a, b);
4747 let e = _mm256_set1_epi16(651);
4748 assert_eq_m256i(r, e);
4749 }
4750
4751 #[simd_test(enable = "avx2")]
4752 unsafe fn test_mm256_mullo_epi16() {
4753 let a = _mm256_set1_epi16(2);
4754 let b = _mm256_set1_epi16(4);
4755 let r = _mm256_mullo_epi16(a, b);
4756 let e = _mm256_set1_epi16(8);
4757 assert_eq_m256i(r, e);
4758 }
4759
4760 #[simd_test(enable = "avx2")]
4761 unsafe fn test_mm256_mullo_epi32() {
4762 let a = _mm256_set1_epi32(2);
4763 let b = _mm256_set1_epi32(4);
4764 let r = _mm256_mullo_epi32(a, b);
4765 let e = _mm256_set1_epi32(8);
4766 assert_eq_m256i(r, e);
4767 }
4768
4769 #[simd_test(enable = "avx2")]
4770 unsafe fn test_mm256_mulhrs_epi16() {
4771 let a = _mm256_set1_epi16(2);
4772 let b = _mm256_set1_epi16(4);
4773 let r = _mm256_mullo_epi16(a, b);
4774 let e = _mm256_set1_epi16(8);
4775 assert_eq_m256i(r, e);
4776 }
4777
4778 #[simd_test(enable = "avx2")]
4779 unsafe fn test_mm256_or_si256() {
4780 let a = _mm256_set1_epi8(-1);
4781 let b = _mm256_set1_epi8(0);
4782 let r = _mm256_or_si256(a, b);
4783 assert_eq_m256i(r, a);
4784 }
4785
4786 #[simd_test(enable = "avx2")]
4787 unsafe fn test_mm256_packs_epi16() {
4788 let a = _mm256_set1_epi16(2);
4789 let b = _mm256_set1_epi16(4);
4790 let r = _mm256_packs_epi16(a, b);
4791 #[rustfmt::skip]
4792 let e = _mm256_setr_epi8(
4793 2, 2, 2, 2, 2, 2, 2, 2,
4794 4, 4, 4, 4, 4, 4, 4, 4,
4795 2, 2, 2, 2, 2, 2, 2, 2,
4796 4, 4, 4, 4, 4, 4, 4, 4,
4797 );
4798
4799 assert_eq_m256i(r, e);
4800 }
4801
4802 #[simd_test(enable = "avx2")]
4803 unsafe fn test_mm256_packs_epi32() {
4804 let a = _mm256_set1_epi32(2);
4805 let b = _mm256_set1_epi32(4);
4806 let r = _mm256_packs_epi32(a, b);
4807 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4808
4809 assert_eq_m256i(r, e);
4810 }
4811
4812 #[simd_test(enable = "avx2")]
4813 unsafe fn test_mm256_packus_epi16() {
4814 let a = _mm256_set1_epi16(2);
4815 let b = _mm256_set1_epi16(4);
4816 let r = _mm256_packus_epi16(a, b);
4817 #[rustfmt::skip]
4818 let e = _mm256_setr_epi8(
4819 2, 2, 2, 2, 2, 2, 2, 2,
4820 4, 4, 4, 4, 4, 4, 4, 4,
4821 2, 2, 2, 2, 2, 2, 2, 2,
4822 4, 4, 4, 4, 4, 4, 4, 4,
4823 );
4824
4825 assert_eq_m256i(r, e);
4826 }
4827
4828 #[simd_test(enable = "avx2")]
4829 unsafe fn test_mm256_packus_epi32() {
4830 let a = _mm256_set1_epi32(2);
4831 let b = _mm256_set1_epi32(4);
4832 let r = _mm256_packus_epi32(a, b);
4833 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4834
4835 assert_eq_m256i(r, e);
4836 }
4837
4838 #[simd_test(enable = "avx2")]
4839 unsafe fn test_mm256_sad_epu8() {
4840 let a = _mm256_set1_epi8(2);
4841 let b = _mm256_set1_epi8(4);
4842 let r = _mm256_sad_epu8(a, b);
4843 let e = _mm256_set1_epi64x(16);
4844 assert_eq_m256i(r, e);
4845 }
4846
4847 #[simd_test(enable = "avx2")]
4848 unsafe fn test_mm256_shufflehi_epi16() {
4849 #[rustfmt::skip]
4850 let a = _mm256_setr_epi16(
4851 0, 1, 2, 3, 11, 22, 33, 44,
4852 4, 5, 6, 7, 55, 66, 77, 88,
4853 );
4854 #[rustfmt::skip]
4855 let e = _mm256_setr_epi16(
4856 0, 1, 2, 3, 44, 22, 22, 11,
4857 4, 5, 6, 7, 88, 66, 66, 55,
4858 );
4859 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4860 assert_eq_m256i(r, e);
4861 }
4862
4863 #[simd_test(enable = "avx2")]
4864 unsafe fn test_mm256_shufflelo_epi16() {
4865 #[rustfmt::skip]
4866 let a = _mm256_setr_epi16(
4867 11, 22, 33, 44, 0, 1, 2, 3,
4868 55, 66, 77, 88, 4, 5, 6, 7,
4869 );
4870 #[rustfmt::skip]
4871 let e = _mm256_setr_epi16(
4872 44, 22, 22, 11, 0, 1, 2, 3,
4873 88, 66, 66, 55, 4, 5, 6, 7,
4874 );
4875 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4876 assert_eq_m256i(r, e);
4877 }
4878
4879 #[simd_test(enable = "avx2")]
4880 unsafe fn test_mm256_sign_epi16() {
4881 let a = _mm256_set1_epi16(2);
4882 let b = _mm256_set1_epi16(-1);
4883 let r = _mm256_sign_epi16(a, b);
4884 let e = _mm256_set1_epi16(-2);
4885 assert_eq_m256i(r, e);
4886 }
4887
4888 #[simd_test(enable = "avx2")]
4889 unsafe fn test_mm256_sign_epi32() {
4890 let a = _mm256_set1_epi32(2);
4891 let b = _mm256_set1_epi32(-1);
4892 let r = _mm256_sign_epi32(a, b);
4893 let e = _mm256_set1_epi32(-2);
4894 assert_eq_m256i(r, e);
4895 }
4896
4897 #[simd_test(enable = "avx2")]
4898 unsafe fn test_mm256_sign_epi8() {
4899 let a = _mm256_set1_epi8(2);
4900 let b = _mm256_set1_epi8(-1);
4901 let r = _mm256_sign_epi8(a, b);
4902 let e = _mm256_set1_epi8(-2);
4903 assert_eq_m256i(r, e);
4904 }
4905
4906 #[simd_test(enable = "avx2")]
4907 unsafe fn test_mm256_sll_epi16() {
4908 let a = _mm256_set1_epi16(0xFF);
4909 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4910 let r = _mm256_sll_epi16(a, b);
4911 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4912 }
4913
4914 #[simd_test(enable = "avx2")]
4915 unsafe fn test_mm256_sll_epi32() {
4916 let a = _mm256_set1_epi32(0xFFFF);
4917 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4918 let r = _mm256_sll_epi32(a, b);
4919 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4920 }
4921
4922 #[simd_test(enable = "avx2")]
4923 unsafe fn test_mm256_sll_epi64() {
4924 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4925 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4926 let r = _mm256_sll_epi64(a, b);
4927 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4928 }
4929
4930 #[simd_test(enable = "avx2")]
4931 unsafe fn test_mm256_slli_epi16() {
4932 assert_eq_m256i(
4933 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4934 _mm256_set1_epi16(0xFF0),
4935 );
4936 }
4937
4938 #[simd_test(enable = "avx2")]
4939 unsafe fn test_mm256_slli_epi32() {
4940 assert_eq_m256i(
4941 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
4942 _mm256_set1_epi32(0xFFFF0),
4943 );
4944 }
4945
4946 #[simd_test(enable = "avx2")]
4947 unsafe fn test_mm256_slli_epi64() {
4948 assert_eq_m256i(
4949 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
4950 _mm256_set1_epi64x(0xFFFFFFFF0),
4951 );
4952 }
4953
4954 #[simd_test(enable = "avx2")]
4955 unsafe fn test_mm256_slli_si256() {
4956 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4957 let r = _mm256_slli_si256::<3>(a);
4958 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
4959 }
4960
4961 #[simd_test(enable = "avx2")]
4962 unsafe fn test_mm_sllv_epi32() {
4963 let a = _mm_set1_epi32(2);
4964 let b = _mm_set1_epi32(1);
4965 let r = _mm_sllv_epi32(a, b);
4966 let e = _mm_set1_epi32(4);
4967 assert_eq_m128i(r, e);
4968 }
4969
4970 #[simd_test(enable = "avx2")]
4971 unsafe fn test_mm256_sllv_epi32() {
4972 let a = _mm256_set1_epi32(2);
4973 let b = _mm256_set1_epi32(1);
4974 let r = _mm256_sllv_epi32(a, b);
4975 let e = _mm256_set1_epi32(4);
4976 assert_eq_m256i(r, e);
4977 }
4978
4979 #[simd_test(enable = "avx2")]
4980 unsafe fn test_mm_sllv_epi64() {
4981 let a = _mm_set1_epi64x(2);
4982 let b = _mm_set1_epi64x(1);
4983 let r = _mm_sllv_epi64(a, b);
4984 let e = _mm_set1_epi64x(4);
4985 assert_eq_m128i(r, e);
4986 }
4987
4988 #[simd_test(enable = "avx2")]
4989 unsafe fn test_mm256_sllv_epi64() {
4990 let a = _mm256_set1_epi64x(2);
4991 let b = _mm256_set1_epi64x(1);
4992 let r = _mm256_sllv_epi64(a, b);
4993 let e = _mm256_set1_epi64x(4);
4994 assert_eq_m256i(r, e);
4995 }
4996
4997 #[simd_test(enable = "avx2")]
4998 unsafe fn test_mm256_sra_epi16() {
4999 let a = _mm256_set1_epi16(-1);
5000 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5001 let r = _mm256_sra_epi16(a, b);
5002 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5003 }
5004
5005 #[simd_test(enable = "avx2")]
5006 unsafe fn test_mm256_sra_epi32() {
5007 let a = _mm256_set1_epi32(-1);
5008 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5009 let r = _mm256_sra_epi32(a, b);
5010 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5011 }
5012
5013 #[simd_test(enable = "avx2")]
5014 unsafe fn test_mm256_srai_epi16() {
5015 assert_eq_m256i(
5016 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5017 _mm256_set1_epi16(-1),
5018 );
5019 }
5020
5021 #[simd_test(enable = "avx2")]
5022 unsafe fn test_mm256_srai_epi32() {
5023 assert_eq_m256i(
5024 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5025 _mm256_set1_epi32(-1),
5026 );
5027 }
5028
5029 #[simd_test(enable = "avx2")]
5030 unsafe fn test_mm_srav_epi32() {
5031 let a = _mm_set1_epi32(4);
5032 let count = _mm_set1_epi32(1);
5033 let r = _mm_srav_epi32(a, count);
5034 let e = _mm_set1_epi32(2);
5035 assert_eq_m128i(r, e);
5036 }
5037
5038 #[simd_test(enable = "avx2")]
5039 unsafe fn test_mm256_srav_epi32() {
5040 let a = _mm256_set1_epi32(4);
5041 let count = _mm256_set1_epi32(1);
5042 let r = _mm256_srav_epi32(a, count);
5043 let e = _mm256_set1_epi32(2);
5044 assert_eq_m256i(r, e);
5045 }
5046
5047 #[simd_test(enable = "avx2")]
5048 unsafe fn test_mm256_srli_si256() {
5049 #[rustfmt::skip]
5050 let a = _mm256_setr_epi8(
5051 1, 2, 3, 4, 5, 6, 7, 8,
5052 9, 10, 11, 12, 13, 14, 15, 16,
5053 17, 18, 19, 20, 21, 22, 23, 24,
5054 25, 26, 27, 28, 29, 30, 31, 32,
5055 );
5056 let r = _mm256_srli_si256::<3>(a);
5057 #[rustfmt::skip]
5058 let e = _mm256_setr_epi8(
5059 4, 5, 6, 7, 8, 9, 10, 11,
5060 12, 13, 14, 15, 16, 0, 0, 0,
5061 20, 21, 22, 23, 24, 25, 26, 27,
5062 28, 29, 30, 31, 32, 0, 0, 0,
5063 );
5064 assert_eq_m256i(r, e);
5065 }
5066
5067 #[simd_test(enable = "avx2")]
5068 unsafe fn test_mm256_srl_epi16() {
5069 let a = _mm256_set1_epi16(0xFF);
5070 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5071 let r = _mm256_srl_epi16(a, b);
5072 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5073 }
5074
5075 #[simd_test(enable = "avx2")]
5076 unsafe fn test_mm256_srl_epi32() {
5077 let a = _mm256_set1_epi32(0xFFFF);
5078 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5079 let r = _mm256_srl_epi32(a, b);
5080 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5081 }
5082
5083 #[simd_test(enable = "avx2")]
5084 unsafe fn test_mm256_srl_epi64() {
5085 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5086 let b = _mm_setr_epi64x(4, 0);
5087 let r = _mm256_srl_epi64(a, b);
5088 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5089 }
5090
5091 #[simd_test(enable = "avx2")]
5092 unsafe fn test_mm256_srli_epi16() {
5093 assert_eq_m256i(
5094 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5095 _mm256_set1_epi16(0xF),
5096 );
5097 }
5098
5099 #[simd_test(enable = "avx2")]
5100 unsafe fn test_mm256_srli_epi32() {
5101 assert_eq_m256i(
5102 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5103 _mm256_set1_epi32(0xFFF),
5104 );
5105 }
5106
5107 #[simd_test(enable = "avx2")]
5108 unsafe fn test_mm256_srli_epi64() {
5109 assert_eq_m256i(
5110 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5111 _mm256_set1_epi64x(0xFFFFFFF),
5112 );
5113 }
5114
5115 #[simd_test(enable = "avx2")]
5116 unsafe fn test_mm_srlv_epi32() {
5117 let a = _mm_set1_epi32(2);
5118 let count = _mm_set1_epi32(1);
5119 let r = _mm_srlv_epi32(a, count);
5120 let e = _mm_set1_epi32(1);
5121 assert_eq_m128i(r, e);
5122 }
5123
5124 #[simd_test(enable = "avx2")]
5125 unsafe fn test_mm256_srlv_epi32() {
5126 let a = _mm256_set1_epi32(2);
5127 let count = _mm256_set1_epi32(1);
5128 let r = _mm256_srlv_epi32(a, count);
5129 let e = _mm256_set1_epi32(1);
5130 assert_eq_m256i(r, e);
5131 }
5132
5133 #[simd_test(enable = "avx2")]
5134 unsafe fn test_mm_srlv_epi64() {
5135 let a = _mm_set1_epi64x(2);
5136 let count = _mm_set1_epi64x(1);
5137 let r = _mm_srlv_epi64(a, count);
5138 let e = _mm_set1_epi64x(1);
5139 assert_eq_m128i(r, e);
5140 }
5141
5142 #[simd_test(enable = "avx2")]
5143 unsafe fn test_mm256_srlv_epi64() {
5144 let a = _mm256_set1_epi64x(2);
5145 let count = _mm256_set1_epi64x(1);
5146 let r = _mm256_srlv_epi64(a, count);
5147 let e = _mm256_set1_epi64x(1);
5148 assert_eq_m256i(r, e);
5149 }
5150
5151 #[simd_test(enable = "avx2")]
5152 unsafe fn test_mm256_stream_load_si256() {
5153 let a = _mm256_set_epi64x(5, 6, 7, 8);
5154 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5155 assert_eq_m256i(a, r);
5156 }
5157
5158 #[simd_test(enable = "avx2")]
5159 unsafe fn test_mm256_sub_epi16() {
5160 let a = _mm256_set1_epi16(4);
5161 let b = _mm256_set1_epi16(2);
5162 let r = _mm256_sub_epi16(a, b);
5163 assert_eq_m256i(r, b);
5164 }
5165
5166 #[simd_test(enable = "avx2")]
5167 unsafe fn test_mm256_sub_epi32() {
5168 let a = _mm256_set1_epi32(4);
5169 let b = _mm256_set1_epi32(2);
5170 let r = _mm256_sub_epi32(a, b);
5171 assert_eq_m256i(r, b);
5172 }
5173
5174 #[simd_test(enable = "avx2")]
5175 unsafe fn test_mm256_sub_epi64() {
5176 let a = _mm256_set1_epi64x(4);
5177 let b = _mm256_set1_epi64x(2);
5178 let r = _mm256_sub_epi64(a, b);
5179 assert_eq_m256i(r, b);
5180 }
5181
5182 #[simd_test(enable = "avx2")]
5183 unsafe fn test_mm256_sub_epi8() {
5184 let a = _mm256_set1_epi8(4);
5185 let b = _mm256_set1_epi8(2);
5186 let r = _mm256_sub_epi8(a, b);
5187 assert_eq_m256i(r, b);
5188 }
5189
5190 #[simd_test(enable = "avx2")]
5191 unsafe fn test_mm256_subs_epi16() {
5192 let a = _mm256_set1_epi16(4);
5193 let b = _mm256_set1_epi16(2);
5194 let r = _mm256_subs_epi16(a, b);
5195 assert_eq_m256i(r, b);
5196 }
5197
5198 #[simd_test(enable = "avx2")]
5199 unsafe fn test_mm256_subs_epi8() {
5200 let a = _mm256_set1_epi8(4);
5201 let b = _mm256_set1_epi8(2);
5202 let r = _mm256_subs_epi8(a, b);
5203 assert_eq_m256i(r, b);
5204 }
5205
5206 #[simd_test(enable = "avx2")]
5207 unsafe fn test_mm256_subs_epu16() {
5208 let a = _mm256_set1_epi16(4);
5209 let b = _mm256_set1_epi16(2);
5210 let r = _mm256_subs_epu16(a, b);
5211 assert_eq_m256i(r, b);
5212 }
5213
5214 #[simd_test(enable = "avx2")]
5215 unsafe fn test_mm256_subs_epu8() {
5216 let a = _mm256_set1_epi8(4);
5217 let b = _mm256_set1_epi8(2);
5218 let r = _mm256_subs_epu8(a, b);
5219 assert_eq_m256i(r, b);
5220 }
5221
5222 #[simd_test(enable = "avx2")]
5223 unsafe fn test_mm256_xor_si256() {
5224 let a = _mm256_set1_epi8(5);
5225 let b = _mm256_set1_epi8(3);
5226 let r = _mm256_xor_si256(a, b);
5227 assert_eq_m256i(r, _mm256_set1_epi8(6));
5228 }
5229
5230 #[simd_test(enable = "avx2")]
5231 unsafe fn test_mm256_alignr_epi8() {
5232 #[rustfmt::skip]
5233 let a = _mm256_setr_epi8(
5234 1, 2, 3, 4, 5, 6, 7, 8,
5235 9, 10, 11, 12, 13, 14, 15, 16,
5236 17, 18, 19, 20, 21, 22, 23, 24,
5237 25, 26, 27, 28, 29, 30, 31, 32,
5238 );
5239 #[rustfmt::skip]
5240 let b = _mm256_setr_epi8(
5241 -1, -2, -3, -4, -5, -6, -7, -8,
5242 -9, -10, -11, -12, -13, -14, -15, -16,
5243 -17, -18, -19, -20, -21, -22, -23, -24,
5244 -25, -26, -27, -28, -29, -30, -31, -32,
5245 );
5246 let r = _mm256_alignr_epi8::<33>(a, b);
5247 assert_eq_m256i(r, _mm256_set1_epi8(0));
5248
5249 let r = _mm256_alignr_epi8::<17>(a, b);
5250 #[rustfmt::skip]
5251 let expected = _mm256_setr_epi8(
5252 2, 3, 4, 5, 6, 7, 8, 9,
5253 10, 11, 12, 13, 14, 15, 16, 0,
5254 18, 19, 20, 21, 22, 23, 24, 25,
5255 26, 27, 28, 29, 30, 31, 32, 0,
5256 );
5257 assert_eq_m256i(r, expected);
5258
5259 let r = _mm256_alignr_epi8::<4>(a, b);
5260 #[rustfmt::skip]
5261 let expected = _mm256_setr_epi8(
5262 -5, -6, -7, -8, -9, -10, -11, -12,
5263 -13, -14, -15, -16, 1, 2, 3, 4,
5264 -21, -22, -23, -24, -25, -26, -27, -28,
5265 -29, -30, -31, -32, 17, 18, 19, 20,
5266 );
5267 assert_eq_m256i(r, expected);
5268
5269 let r = _mm256_alignr_epi8::<15>(a, b);
5270 #[rustfmt::skip]
5271 let expected = _mm256_setr_epi8(
5272 -16, 1, 2, 3, 4, 5, 6, 7,
5273 8, 9, 10, 11, 12, 13, 14, 15,
5274 -32, 17, 18, 19, 20, 21, 22, 23,
5275 24, 25, 26, 27, 28, 29, 30, 31,
5276 );
5277 assert_eq_m256i(r, expected);
5278
5279 let r = _mm256_alignr_epi8::<0>(a, b);
5280 assert_eq_m256i(r, b);
5281
5282 let r = _mm256_alignr_epi8::<16>(a, b);
5283 assert_eq_m256i(r, a);
5284 }
5285
5286 #[simd_test(enable = "avx2")]
5287 unsafe fn test_mm256_shuffle_epi8() {
5288 #[rustfmt::skip]
5289 let a = _mm256_setr_epi8(
5290 1, 2, 3, 4, 5, 6, 7, 8,
5291 9, 10, 11, 12, 13, 14, 15, 16,
5292 17, 18, 19, 20, 21, 22, 23, 24,
5293 25, 26, 27, 28, 29, 30, 31, 32,
5294 );
5295 #[rustfmt::skip]
5296 let b = _mm256_setr_epi8(
5297 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5298 12, 5, 5, 10, 4, 1, 8, 0,
5299 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5300 12, 5, 5, 10, 4, 1, 8, 0,
5301 );
5302 #[rustfmt::skip]
5303 let expected = _mm256_setr_epi8(
5304 5, 0, 5, 4, 9, 13, 7, 4,
5305 13, 6, 6, 11, 5, 2, 9, 1,
5306 21, 0, 21, 20, 25, 29, 23, 20,
5307 29, 22, 22, 27, 21, 18, 25, 17,
5308 );
5309 let r = _mm256_shuffle_epi8(a, b);
5310 assert_eq_m256i(r, expected);
5311 }
5312
5313 #[simd_test(enable = "avx2")]
5314 unsafe fn test_mm256_permutevar8x32_epi32() {
5315 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5316 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5317 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5318 let r = _mm256_permutevar8x32_epi32(a, b);
5319 assert_eq_m256i(r, expected);
5320 }
5321
5322 #[simd_test(enable = "avx2")]
5323 unsafe fn test_mm256_permute4x64_epi64() {
5324 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5325 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5326 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5327 assert_eq_m256i(r, expected);
5328 }
5329
5330 #[simd_test(enable = "avx2")]
5331 unsafe fn test_mm256_permute2x128_si256() {
5332 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5333 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5334 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5335 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5336 assert_eq_m256i(r, e);
5337 }
5338
5339 #[simd_test(enable = "avx2")]
5340 unsafe fn test_mm256_permute4x64_pd() {
5341 let a = _mm256_setr_pd(1., 2., 3., 4.);
5342 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5343 let e = _mm256_setr_pd(4., 1., 2., 1.);
5344 assert_eq_m256d(r, e);
5345 }
5346
5347 #[simd_test(enable = "avx2")]
5348 unsafe fn test_mm256_permutevar8x32_ps() {
5349 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5350 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5351 let r = _mm256_permutevar8x32_ps(a, b);
5352 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5353 assert_eq_m256(r, e);
5354 }
5355
5356 #[simd_test(enable = "avx2")]
5357 unsafe fn test_mm_i32gather_epi32() {
5358 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5359 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5361 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5362 }
5363
5364 #[simd_test(enable = "avx2")]
5365 unsafe fn test_mm_mask_i32gather_epi32() {
5366 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5367 let r = _mm_mask_i32gather_epi32::<4>(
5369 _mm_set1_epi32(256),
5370 arr.as_ptr(),
5371 _mm_setr_epi32(0, 16, 64, 96),
5372 _mm_setr_epi32(-1, -1, -1, 0),
5373 );
5374 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5375 }
5376
5377 #[simd_test(enable = "avx2")]
5378 unsafe fn test_mm256_i32gather_epi32() {
5379 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5380 let r =
5382 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5383 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5384 }
5385
5386 #[simd_test(enable = "avx2")]
5387 unsafe fn test_mm256_mask_i32gather_epi32() {
5388 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5389 let r = _mm256_mask_i32gather_epi32::<4>(
5391 _mm256_set1_epi32(256),
5392 arr.as_ptr(),
5393 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5394 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5395 );
5396 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5397 }
5398
5399 #[simd_test(enable = "avx2")]
5400 unsafe fn test_mm_i32gather_ps() {
5401 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5402 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5404 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5405 }
5406
5407 #[simd_test(enable = "avx2")]
5408 unsafe fn test_mm_mask_i32gather_ps() {
5409 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5410 let r = _mm_mask_i32gather_ps::<4>(
5412 _mm_set1_ps(256.0),
5413 arr.as_ptr(),
5414 _mm_setr_epi32(0, 16, 64, 96),
5415 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5416 );
5417 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5418 }
5419
5420 #[simd_test(enable = "avx2")]
5421 unsafe fn test_mm256_i32gather_ps() {
5422 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5423 let r =
5425 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5426 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5427 }
5428
5429 #[simd_test(enable = "avx2")]
5430 unsafe fn test_mm256_mask_i32gather_ps() {
5431 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5432 let r = _mm256_mask_i32gather_ps::<4>(
5434 _mm256_set1_ps(256.0),
5435 arr.as_ptr(),
5436 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5437 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5438 );
5439 assert_eq_m256(
5440 r,
5441 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5442 );
5443 }
5444
5445 #[simd_test(enable = "avx2")]
5446 unsafe fn test_mm_i32gather_epi64() {
5447 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5448 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5450 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5451 }
5452
5453 #[simd_test(enable = "avx2")]
5454 unsafe fn test_mm_mask_i32gather_epi64() {
5455 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5456 let r = _mm_mask_i32gather_epi64::<8>(
5458 _mm_set1_epi64x(256),
5459 arr.as_ptr(),
5460 _mm_setr_epi32(16, 16, 16, 16),
5461 _mm_setr_epi64x(-1, 0),
5462 );
5463 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5464 }
5465
5466 #[simd_test(enable = "avx2")]
5467 unsafe fn test_mm256_i32gather_epi64() {
5468 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5469 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5471 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5472 }
5473
5474 #[simd_test(enable = "avx2")]
5475 unsafe fn test_mm256_mask_i32gather_epi64() {
5476 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5477 let r = _mm256_mask_i32gather_epi64::<8>(
5479 _mm256_set1_epi64x(256),
5480 arr.as_ptr(),
5481 _mm_setr_epi32(0, 16, 64, 96),
5482 _mm256_setr_epi64x(-1, -1, -1, 0),
5483 );
5484 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5485 }
5486
5487 #[simd_test(enable = "avx2")]
5488 unsafe fn test_mm_i32gather_pd() {
5489 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5490 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5492 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5493 }
5494
5495 #[simd_test(enable = "avx2")]
5496 unsafe fn test_mm_mask_i32gather_pd() {
5497 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5498 let r = _mm_mask_i32gather_pd::<8>(
5500 _mm_set1_pd(256.0),
5501 arr.as_ptr(),
5502 _mm_setr_epi32(16, 16, 16, 16),
5503 _mm_setr_pd(-1.0, 0.0),
5504 );
5505 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5506 }
5507
5508 #[simd_test(enable = "avx2")]
5509 unsafe fn test_mm256_i32gather_pd() {
5510 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5511 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5513 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5514 }
5515
5516 #[simd_test(enable = "avx2")]
5517 unsafe fn test_mm256_mask_i32gather_pd() {
5518 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5519 let r = _mm256_mask_i32gather_pd::<8>(
5521 _mm256_set1_pd(256.0),
5522 arr.as_ptr(),
5523 _mm_setr_epi32(0, 16, 64, 96),
5524 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5525 );
5526 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5527 }
5528
5529 #[simd_test(enable = "avx2")]
5530 unsafe fn test_mm_i64gather_epi32() {
5531 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5532 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5534 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5535 }
5536
5537 #[simd_test(enable = "avx2")]
5538 unsafe fn test_mm_mask_i64gather_epi32() {
5539 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5540 let r = _mm_mask_i64gather_epi32::<4>(
5542 _mm_set1_epi32(256),
5543 arr.as_ptr(),
5544 _mm_setr_epi64x(0, 16),
5545 _mm_setr_epi32(-1, 0, -1, 0),
5546 );
5547 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5548 }
5549
5550 #[simd_test(enable = "avx2")]
5551 unsafe fn test_mm256_i64gather_epi32() {
5552 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5553 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5555 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5556 }
5557
5558 #[simd_test(enable = "avx2")]
5559 unsafe fn test_mm256_mask_i64gather_epi32() {
5560 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5561 let r = _mm256_mask_i64gather_epi32::<4>(
5563 _mm_set1_epi32(256),
5564 arr.as_ptr(),
5565 _mm256_setr_epi64x(0, 16, 64, 96),
5566 _mm_setr_epi32(-1, -1, -1, 0),
5567 );
5568 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5569 }
5570
5571 #[simd_test(enable = "avx2")]
5572 unsafe fn test_mm_i64gather_ps() {
5573 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5574 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5576 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5577 }
5578
5579 #[simd_test(enable = "avx2")]
5580 unsafe fn test_mm_mask_i64gather_ps() {
5581 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5582 let r = _mm_mask_i64gather_ps::<4>(
5584 _mm_set1_ps(256.0),
5585 arr.as_ptr(),
5586 _mm_setr_epi64x(0, 16),
5587 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5588 );
5589 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5590 }
5591
5592 #[simd_test(enable = "avx2")]
5593 unsafe fn test_mm256_i64gather_ps() {
5594 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5595 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5597 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5598 }
5599
5600 #[simd_test(enable = "avx2")]
5601 unsafe fn test_mm256_mask_i64gather_ps() {
5602 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5603 let r = _mm256_mask_i64gather_ps::<4>(
5605 _mm_set1_ps(256.0),
5606 arr.as_ptr(),
5607 _mm256_setr_epi64x(0, 16, 64, 96),
5608 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5609 );
5610 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5611 }
5612
5613 #[simd_test(enable = "avx2")]
5614 unsafe fn test_mm_i64gather_epi64() {
5615 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5616 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5618 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5619 }
5620
5621 #[simd_test(enable = "avx2")]
5622 unsafe fn test_mm_mask_i64gather_epi64() {
5623 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5624 let r = _mm_mask_i64gather_epi64::<8>(
5626 _mm_set1_epi64x(256),
5627 arr.as_ptr(),
5628 _mm_setr_epi64x(16, 16),
5629 _mm_setr_epi64x(-1, 0),
5630 );
5631 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5632 }
5633
5634 #[simd_test(enable = "avx2")]
5635 unsafe fn test_mm256_i64gather_epi64() {
5636 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5637 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5639 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5640 }
5641
5642 #[simd_test(enable = "avx2")]
5643 unsafe fn test_mm256_mask_i64gather_epi64() {
5644 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5645 let r = _mm256_mask_i64gather_epi64::<8>(
5647 _mm256_set1_epi64x(256),
5648 arr.as_ptr(),
5649 _mm256_setr_epi64x(0, 16, 64, 96),
5650 _mm256_setr_epi64x(-1, -1, -1, 0),
5651 );
5652 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5653 }
5654
5655 #[simd_test(enable = "avx2")]
5656 unsafe fn test_mm_i64gather_pd() {
5657 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5658 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5660 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5661 }
5662
5663 #[simd_test(enable = "avx2")]
5664 unsafe fn test_mm_mask_i64gather_pd() {
5665 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5666 let r = _mm_mask_i64gather_pd::<8>(
5668 _mm_set1_pd(256.0),
5669 arr.as_ptr(),
5670 _mm_setr_epi64x(16, 16),
5671 _mm_setr_pd(-1.0, 0.0),
5672 );
5673 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5674 }
5675
5676 #[simd_test(enable = "avx2")]
5677 unsafe fn test_mm256_i64gather_pd() {
5678 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5679 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5681 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5682 }
5683
5684 #[simd_test(enable = "avx2")]
5685 unsafe fn test_mm256_mask_i64gather_pd() {
5686 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5687 let r = _mm256_mask_i64gather_pd::<8>(
5689 _mm256_set1_pd(256.0),
5690 arr.as_ptr(),
5691 _mm256_setr_epi64x(0, 16, 64, 96),
5692 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5693 );
5694 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5695 }
5696
5697 #[simd_test(enable = "avx")]
5698 unsafe fn test_mm256_extract_epi8() {
5699 #[rustfmt::skip]
5700 let a = _mm256_setr_epi8(
5701 -1, 1, 2, 3, 4, 5, 6, 7,
5702 8, 9, 10, 11, 12, 13, 14, 15,
5703 16, 17, 18, 19, 20, 21, 22, 23,
5704 24, 25, 26, 27, 28, 29, 30, 31
5705 );
5706 let r1 = _mm256_extract_epi8::<0>(a);
5707 let r2 = _mm256_extract_epi8::<3>(a);
5708 assert_eq!(r1, 0xFF);
5709 assert_eq!(r2, 3);
5710 }
5711
5712 #[simd_test(enable = "avx2")]
5713 unsafe fn test_mm256_extract_epi16() {
5714 #[rustfmt::skip]
5715 let a = _mm256_setr_epi16(
5716 -1, 1, 2, 3, 4, 5, 6, 7,
5717 8, 9, 10, 11, 12, 13, 14, 15,
5718 );
5719 let r1 = _mm256_extract_epi16::<0>(a);
5720 let r2 = _mm256_extract_epi16::<3>(a);
5721 assert_eq!(r1, 0xFFFF);
5722 assert_eq!(r2, 3);
5723 }
5724}