1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 unsafe {
36 let a = a.as_i32x8();
37 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
38 transmute(r)
39 }
40}
41
42#[inline]
46#[target_feature(enable = "avx2")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
50 unsafe {
51 let a = a.as_i16x16();
52 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
53 transmute(r)
54 }
55}
56
57#[inline]
61#[target_feature(enable = "avx2")]
62#[cfg_attr(test, assert_instr(vpabsb))]
63#[stable(feature = "simd_x86", since = "1.27.0")]
64pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
65 unsafe {
66 let a = a.as_i8x32();
67 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
68 transmute(r)
69 }
70}
71
72#[inline]
76#[target_feature(enable = "avx2")]
77#[cfg_attr(test, assert_instr(vpaddq))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
80 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
81}
82
83#[inline]
87#[target_feature(enable = "avx2")]
88#[cfg_attr(test, assert_instr(vpaddd))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
91 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "avx2")]
99#[cfg_attr(test, assert_instr(vpaddw))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
102 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
103}
104
105#[inline]
109#[target_feature(enable = "avx2")]
110#[cfg_attr(test, assert_instr(vpaddb))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
113 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
114}
115
116#[inline]
120#[target_feature(enable = "avx2")]
121#[cfg_attr(test, assert_instr(vpaddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
131#[target_feature(enable = "avx2")]
132#[cfg_attr(test, assert_instr(vpaddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
136}
137
138#[inline]
142#[target_feature(enable = "avx2")]
143#[cfg_attr(test, assert_instr(vpaddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
147}
148
149#[inline]
153#[target_feature(enable = "avx2")]
154#[cfg_attr(test, assert_instr(vpaddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
158}
159
160#[inline]
165#[target_feature(enable = "avx2")]
166#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
167#[rustc_legacy_const_generics(2)]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
170 static_assert_uimm_bits!(IMM8, 8);
171
172 if IMM8 >= 32 {
175 return _mm256_setzero_si256();
176 }
177 let (a, b) = if IMM8 > 16 {
180 (_mm256_setzero_si256(), a)
181 } else {
182 (a, b)
183 };
184 unsafe {
185 if IMM8 == 16 {
186 return transmute(a);
187 }
188 }
189 const fn mask(shift: u32, i: u32) -> u32 {
190 let shift = shift % 16;
191 let mod_i = i % 16;
192 if mod_i < (16 - shift) {
193 i + shift
194 } else {
195 i + 16 + shift
196 }
197 }
198
199 unsafe {
200 let r: i8x32 = simd_shuffle!(
201 b.as_i8x32(),
202 a.as_i8x32(),
203 [
204 mask(IMM8 as u32, 0),
205 mask(IMM8 as u32, 1),
206 mask(IMM8 as u32, 2),
207 mask(IMM8 as u32, 3),
208 mask(IMM8 as u32, 4),
209 mask(IMM8 as u32, 5),
210 mask(IMM8 as u32, 6),
211 mask(IMM8 as u32, 7),
212 mask(IMM8 as u32, 8),
213 mask(IMM8 as u32, 9),
214 mask(IMM8 as u32, 10),
215 mask(IMM8 as u32, 11),
216 mask(IMM8 as u32, 12),
217 mask(IMM8 as u32, 13),
218 mask(IMM8 as u32, 14),
219 mask(IMM8 as u32, 15),
220 mask(IMM8 as u32, 16),
221 mask(IMM8 as u32, 17),
222 mask(IMM8 as u32, 18),
223 mask(IMM8 as u32, 19),
224 mask(IMM8 as u32, 20),
225 mask(IMM8 as u32, 21),
226 mask(IMM8 as u32, 22),
227 mask(IMM8 as u32, 23),
228 mask(IMM8 as u32, 24),
229 mask(IMM8 as u32, 25),
230 mask(IMM8 as u32, 26),
231 mask(IMM8 as u32, 27),
232 mask(IMM8 as u32, 28),
233 mask(IMM8 as u32, 29),
234 mask(IMM8 as u32, 30),
235 mask(IMM8 as u32, 31),
236 ],
237 );
238 transmute(r)
239 }
240}
241
242#[inline]
247#[target_feature(enable = "avx2")]
248#[cfg_attr(test, assert_instr(vandps))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
251 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandnps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
263 unsafe {
264 let all_ones = _mm256_set1_epi8(-1);
265 transmute(simd_and(
266 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
267 b.as_i64x4(),
268 ))
269 }
270}
271
272#[inline]
276#[target_feature(enable = "avx2")]
277#[cfg_attr(test, assert_instr(vpavgw))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
280 unsafe {
281 let a = simd_cast::<_, u32x16>(a.as_u16x16());
282 let b = simd_cast::<_, u32x16>(b.as_u16x16());
283 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
284 transmute(simd_cast::<_, u16x16>(r))
285 }
286}
287
288#[inline]
292#[target_feature(enable = "avx2")]
293#[cfg_attr(test, assert_instr(vpavgb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
296 unsafe {
297 let a = simd_cast::<_, u16x32>(a.as_u8x32());
298 let b = simd_cast::<_, u16x32>(b.as_u8x32());
299 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
300 transmute(simd_cast::<_, u8x32>(r))
301 }
302}
303
304#[inline]
308#[target_feature(enable = "avx2")]
309#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
310#[rustc_legacy_const_generics(2)]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
313 static_assert_uimm_bits!(IMM4, 4);
314 unsafe {
315 let a = a.as_i32x4();
316 let b = b.as_i32x4();
317 let r: i32x4 = simd_shuffle!(
318 a,
319 b,
320 [
321 [0, 4, 0, 4][IMM4 as usize & 0b11],
322 [1, 1, 5, 5][IMM4 as usize & 0b11],
323 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
324 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
325 ],
326 );
327 transmute(r)
328 }
329}
330
331#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
337#[rustc_legacy_const_generics(2)]
338#[stable(feature = "simd_x86", since = "1.27.0")]
339pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
340 static_assert_uimm_bits!(IMM8, 8);
341 unsafe {
342 let a = a.as_i32x8();
343 let b = b.as_i32x8();
344 let r: i32x8 = simd_shuffle!(
345 a,
346 b,
347 [
348 [0, 8, 0, 8][IMM8 as usize & 0b11],
349 [1, 1, 9, 9][IMM8 as usize & 0b11],
350 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
351 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
352 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
353 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
354 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
355 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
356 ],
357 );
358 transmute(r)
359 }
360}
361
362#[inline]
366#[target_feature(enable = "avx2")]
367#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
368#[rustc_legacy_const_generics(2)]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
371 static_assert_uimm_bits!(IMM8, 8);
372 unsafe {
373 let a = a.as_i16x16();
374 let b = b.as_i16x16();
375
376 let r: i16x16 = simd_shuffle!(
377 a,
378 b,
379 [
380 [0, 16, 0, 16][IMM8 as usize & 0b11],
381 [1, 1, 17, 17][IMM8 as usize & 0b11],
382 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
383 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
384 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
385 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
386 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
387 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
388 [8, 24, 8, 24][IMM8 as usize & 0b11],
389 [9, 9, 25, 25][IMM8 as usize & 0b11],
390 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
391 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
392 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
393 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
394 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
395 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
396 ],
397 );
398 transmute(r)
399 }
400}
401
402#[inline]
406#[target_feature(enable = "avx2")]
407#[cfg_attr(test, assert_instr(vpblendvb))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
410 unsafe {
411 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
412 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
413 }
414}
415
416#[inline]
421#[target_feature(enable = "avx2")]
422#[cfg_attr(test, assert_instr(vpbroadcastb))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
425 unsafe {
426 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
427 transmute::<i8x16, _>(ret)
428 }
429}
430
431#[inline]
436#[target_feature(enable = "avx2")]
437#[cfg_attr(test, assert_instr(vpbroadcastb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
440 unsafe {
441 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
442 transmute::<i8x32, _>(ret)
443 }
444}
445
446#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vbroadcastss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
457 unsafe {
458 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
459 transmute::<i32x4, _>(ret)
460 }
461}
462
463#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vbroadcastss))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
474 unsafe {
475 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
476 transmute::<i32x8, _>(ret)
477 }
478}
479
480#[inline]
485#[target_feature(enable = "avx2")]
486#[cfg_attr(test, assert_instr(vmovddup))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
491 unsafe {
492 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
493 transmute::<i64x2, _>(ret)
494 }
495}
496
497#[inline]
502#[target_feature(enable = "avx2")]
503#[cfg_attr(test, assert_instr(vbroadcastsd))]
504#[stable(feature = "simd_x86", since = "1.27.0")]
505pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
506 unsafe {
507 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
508 transmute::<i64x4, _>(ret)
509 }
510}
511
512#[inline]
517#[target_feature(enable = "avx2")]
518#[cfg_attr(test, assert_instr(vmovddup))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
521 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
522}
523
524#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastsd))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
533 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
534}
535
536#[inline]
541#[target_feature(enable = "avx2")]
542#[stable(feature = "simd_x86_updates", since = "1.82.0")]
543pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
546 transmute::<i64x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
560 unsafe {
561 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
562 transmute::<i64x4, _>(ret)
563 }
564}
565
566#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vbroadcastss))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
575 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
576}
577
578#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastss))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
587 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
588}
589
590#[inline]
595#[target_feature(enable = "avx2")]
596#[cfg_attr(test, assert_instr(vpbroadcastw))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
599 unsafe {
600 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
601 transmute::<i16x8, _>(ret)
602 }
603}
604
605#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vpbroadcastw))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
614 unsafe {
615 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
616 transmute::<i16x16, _>(ret)
617 }
618}
619
620#[inline]
624#[target_feature(enable = "avx2")]
625#[cfg_attr(test, assert_instr(vpcmpeqq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
628 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
629}
630
631#[inline]
635#[target_feature(enable = "avx2")]
636#[cfg_attr(test, assert_instr(vpcmpeqd))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
639 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
640}
641
642#[inline]
646#[target_feature(enable = "avx2")]
647#[cfg_attr(test, assert_instr(vpcmpeqw))]
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
650 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
651}
652
653#[inline]
657#[target_feature(enable = "avx2")]
658#[cfg_attr(test, assert_instr(vpcmpeqb))]
659#[stable(feature = "simd_x86", since = "1.27.0")]
660pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
661 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
662}
663
664#[inline]
668#[target_feature(enable = "avx2")]
669#[cfg_attr(test, assert_instr(vpcmpgtq))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
672 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
673}
674
675#[inline]
679#[target_feature(enable = "avx2")]
680#[cfg_attr(test, assert_instr(vpcmpgtd))]
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
683 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
684}
685
686#[inline]
690#[target_feature(enable = "avx2")]
691#[cfg_attr(test, assert_instr(vpcmpgtw))]
692#[stable(feature = "simd_x86", since = "1.27.0")]
693pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
694 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
695}
696
697#[inline]
701#[target_feature(enable = "avx2")]
702#[cfg_attr(test, assert_instr(vpcmpgtb))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
705 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
706}
707
708#[inline]
712#[target_feature(enable = "avx2")]
713#[cfg_attr(test, assert_instr(vpmovsxwd))]
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
716 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
717}
718
719#[inline]
723#[target_feature(enable = "avx2")]
724#[cfg_attr(test, assert_instr(vpmovsxwq))]
725#[stable(feature = "simd_x86", since = "1.27.0")]
726pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
727 unsafe {
728 let a = a.as_i16x8();
729 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
730 transmute::<i64x4, _>(simd_cast(v64))
731 }
732}
733
734#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpmovsxdq))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
742 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
743}
744
745#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxbw))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
753 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
754}
755
756#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxbd))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
764 unsafe {
765 let a = a.as_i8x16();
766 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
767 transmute::<i32x8, _>(simd_cast(v64))
768 }
769}
770
771#[inline]
775#[target_feature(enable = "avx2")]
776#[cfg_attr(test, assert_instr(vpmovsxbq))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
779 unsafe {
780 let a = a.as_i8x16();
781 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
782 transmute::<i64x4, _>(simd_cast(v32))
783 }
784}
785
786#[inline]
791#[target_feature(enable = "avx2")]
792#[cfg_attr(test, assert_instr(vpmovzxwd))]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
795 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
796}
797
798#[inline]
803#[target_feature(enable = "avx2")]
804#[cfg_attr(test, assert_instr(vpmovzxwq))]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
807 unsafe {
808 let a = a.as_u16x8();
809 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
810 transmute::<i64x4, _>(simd_cast(v64))
811 }
812}
813
814#[inline]
818#[target_feature(enable = "avx2")]
819#[cfg_attr(test, assert_instr(vpmovzxdq))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
822 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
823}
824
825#[inline]
829#[target_feature(enable = "avx2")]
830#[cfg_attr(test, assert_instr(vpmovzxbw))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
833 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
834}
835
836#[inline]
841#[target_feature(enable = "avx2")]
842#[cfg_attr(test, assert_instr(vpmovzxbd))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
845 unsafe {
846 let a = a.as_u8x16();
847 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
848 transmute::<i32x8, _>(simd_cast(v64))
849 }
850}
851
852#[inline]
857#[target_feature(enable = "avx2")]
858#[cfg_attr(test, assert_instr(vpmovzxbq))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
861 unsafe {
862 let a = a.as_u8x16();
863 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
864 transmute::<i64x4, _>(simd_cast(v32))
865 }
866}
867
868#[inline]
872#[target_feature(enable = "avx2")]
873#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
874#[rustc_legacy_const_generics(1)]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
877 static_assert_uimm_bits!(IMM1, 1);
878 unsafe {
879 let a = a.as_i64x4();
880 let b = i64x4::ZERO;
881 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
882 transmute(dst)
883 }
884}
885
886#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vphaddw))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894 let a = a.as_i16x16();
895 let b = b.as_i16x16();
896 unsafe {
897 let even: i16x16 = simd_shuffle!(
898 a,
899 b,
900 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
901 );
902 let odd: i16x16 = simd_shuffle!(
903 a,
904 b,
905 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
906 );
907 simd_add(even, odd).as_m256i()
908 }
909}
910
911#[inline]
915#[target_feature(enable = "avx2")]
916#[cfg_attr(test, assert_instr(vphaddd))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
919 let a = a.as_i32x8();
920 let b = b.as_i32x8();
921 unsafe {
922 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
923 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
924 simd_add(even, odd).as_m256i()
925 }
926}
927
928#[inline]
933#[target_feature(enable = "avx2")]
934#[cfg_attr(test, assert_instr(vphaddsw))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
937 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
938}
939
940#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vphsubw))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
948 let a = a.as_i16x16();
949 let b = b.as_i16x16();
950 unsafe {
951 let even: i16x16 = simd_shuffle!(
952 a,
953 b,
954 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
955 );
956 let odd: i16x16 = simd_shuffle!(
957 a,
958 b,
959 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
960 );
961 simd_sub(even, odd).as_m256i()
962 }
963}
964
965#[inline]
969#[target_feature(enable = "avx2")]
970#[cfg_attr(test, assert_instr(vphsubd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
973 let a = a.as_i32x8();
974 let b = b.as_i32x8();
975 unsafe {
976 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
977 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
978 simd_sub(even, odd).as_m256i()
979 }
980}
981
982#[inline]
987#[target_feature(enable = "avx2")]
988#[cfg_attr(test, assert_instr(vphsubsw))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
991 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
992}
993
994#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1002#[rustc_legacy_const_generics(2)]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1005 slice: *const i32,
1006 offsets: __m128i,
1007) -> __m128i {
1008 static_assert_imm8_scale!(SCALE);
1009 let zero = i32x4::ZERO;
1010 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1011 let offsets = offsets.as_i32x4();
1012 let slice = slice as *const i8;
1013 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1014 transmute(r)
1015}
1016
1017#[inline]
1024#[target_feature(enable = "avx2")]
1025#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1026#[rustc_legacy_const_generics(4)]
1027#[stable(feature = "simd_x86", since = "1.27.0")]
1028pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1029 src: __m128i,
1030 slice: *const i32,
1031 offsets: __m128i,
1032 mask: __m128i,
1033) -> __m128i {
1034 static_assert_imm8_scale!(SCALE);
1035 let src = src.as_i32x4();
1036 let mask = mask.as_i32x4();
1037 let offsets = offsets.as_i32x4();
1038 let slice = slice as *const i8;
1039 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1040 transmute(r)
1041}
1042
1043#[inline]
1049#[target_feature(enable = "avx2")]
1050#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1051#[rustc_legacy_const_generics(2)]
1052#[stable(feature = "simd_x86", since = "1.27.0")]
1053pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1054 slice: *const i32,
1055 offsets: __m256i,
1056) -> __m256i {
1057 static_assert_imm8_scale!(SCALE);
1058 let zero = i32x8::ZERO;
1059 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1060 let offsets = offsets.as_i32x8();
1061 let slice = slice as *const i8;
1062 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1063 transmute(r)
1064}
1065
1066#[inline]
1073#[target_feature(enable = "avx2")]
1074#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1075#[rustc_legacy_const_generics(4)]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1077pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1078 src: __m256i,
1079 slice: *const i32,
1080 offsets: __m256i,
1081 mask: __m256i,
1082) -> __m256i {
1083 static_assert_imm8_scale!(SCALE);
1084 let src = src.as_i32x8();
1085 let mask = mask.as_i32x8();
1086 let offsets = offsets.as_i32x8();
1087 let slice = slice as *const i8;
1088 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1089 transmute(r)
1090}
1091
1092#[inline]
1098#[target_feature(enable = "avx2")]
1099#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1100#[rustc_legacy_const_generics(2)]
1101#[stable(feature = "simd_x86", since = "1.27.0")]
1102pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1103 static_assert_imm8_scale!(SCALE);
1104 let zero = _mm_setzero_ps();
1105 let neg_one = _mm_set1_ps(-1.0);
1106 let offsets = offsets.as_i32x4();
1107 let slice = slice as *const i8;
1108 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1109}
1110
1111#[inline]
1118#[target_feature(enable = "avx2")]
1119#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1120#[rustc_legacy_const_generics(4)]
1121#[stable(feature = "simd_x86", since = "1.27.0")]
1122pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1123 src: __m128,
1124 slice: *const f32,
1125 offsets: __m128i,
1126 mask: __m128,
1127) -> __m128 {
1128 static_assert_imm8_scale!(SCALE);
1129 let offsets = offsets.as_i32x4();
1130 let slice = slice as *const i8;
1131 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1132}
1133
1134#[inline]
1140#[target_feature(enable = "avx2")]
1141#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1142#[rustc_legacy_const_generics(2)]
1143#[stable(feature = "simd_x86", since = "1.27.0")]
1144pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1145 static_assert_imm8_scale!(SCALE);
1146 let zero = _mm256_setzero_ps();
1147 let neg_one = _mm256_set1_ps(-1.0);
1148 let offsets = offsets.as_i32x8();
1149 let slice = slice as *const i8;
1150 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1151}
1152
1153#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1165 src: __m256,
1166 slice: *const f32,
1167 offsets: __m256i,
1168 mask: __m256,
1169) -> __m256 {
1170 static_assert_imm8_scale!(SCALE);
1171 let offsets = offsets.as_i32x8();
1172 let slice = slice as *const i8;
1173 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1174}
1175
1176#[inline]
1182#[target_feature(enable = "avx2")]
1183#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1184#[rustc_legacy_const_generics(2)]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1187 slice: *const i64,
1188 offsets: __m128i,
1189) -> __m128i {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero = i64x2::ZERO;
1192 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1193 let offsets = offsets.as_i32x4();
1194 let slice = slice as *const i8;
1195 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1196 transmute(r)
1197}
1198
1199#[inline]
1206#[target_feature(enable = "avx2")]
1207#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1208#[rustc_legacy_const_generics(4)]
1209#[stable(feature = "simd_x86", since = "1.27.0")]
1210pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1211 src: __m128i,
1212 slice: *const i64,
1213 offsets: __m128i,
1214 mask: __m128i,
1215) -> __m128i {
1216 static_assert_imm8_scale!(SCALE);
1217 let src = src.as_i64x2();
1218 let mask = mask.as_i64x2();
1219 let offsets = offsets.as_i32x4();
1220 let slice = slice as *const i8;
1221 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1222 transmute(r)
1223}
1224
1225#[inline]
1231#[target_feature(enable = "avx2")]
1232#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1233#[rustc_legacy_const_generics(2)]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1236 slice: *const i64,
1237 offsets: __m128i,
1238) -> __m256i {
1239 static_assert_imm8_scale!(SCALE);
1240 let zero = i64x4::ZERO;
1241 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1242 let offsets = offsets.as_i32x4();
1243 let slice = slice as *const i8;
1244 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1245 transmute(r)
1246}
1247
1248#[inline]
1255#[target_feature(enable = "avx2")]
1256#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1257#[rustc_legacy_const_generics(4)]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1260 src: __m256i,
1261 slice: *const i64,
1262 offsets: __m128i,
1263 mask: __m256i,
1264) -> __m256i {
1265 static_assert_imm8_scale!(SCALE);
1266 let src = src.as_i64x4();
1267 let mask = mask.as_i64x4();
1268 let offsets = offsets.as_i32x4();
1269 let slice = slice as *const i8;
1270 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1271 transmute(r)
1272}
1273
1274#[inline]
1280#[target_feature(enable = "avx2")]
1281#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1282#[rustc_legacy_const_generics(2)]
1283#[stable(feature = "simd_x86", since = "1.27.0")]
1284pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1285 static_assert_imm8_scale!(SCALE);
1286 let zero = _mm_setzero_pd();
1287 let neg_one = _mm_set1_pd(-1.0);
1288 let offsets = offsets.as_i32x4();
1289 let slice = slice as *const i8;
1290 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1291}
1292
1293#[inline]
1300#[target_feature(enable = "avx2")]
1301#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1302#[rustc_legacy_const_generics(4)]
1303#[stable(feature = "simd_x86", since = "1.27.0")]
1304pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1305 src: __m128d,
1306 slice: *const f64,
1307 offsets: __m128i,
1308 mask: __m128d,
1309) -> __m128d {
1310 static_assert_imm8_scale!(SCALE);
1311 let offsets = offsets.as_i32x4();
1312 let slice = slice as *const i8;
1313 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1314}
1315
1316#[inline]
1322#[target_feature(enable = "avx2")]
1323#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1324#[rustc_legacy_const_generics(2)]
1325#[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1327 slice: *const f64,
1328 offsets: __m128i,
1329) -> __m256d {
1330 static_assert_imm8_scale!(SCALE);
1331 let zero = _mm256_setzero_pd();
1332 let neg_one = _mm256_set1_pd(-1.0);
1333 let offsets = offsets.as_i32x4();
1334 let slice = slice as *const i8;
1335 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1336}
1337
1338#[inline]
1345#[target_feature(enable = "avx2")]
1346#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1347#[rustc_legacy_const_generics(4)]
1348#[stable(feature = "simd_x86", since = "1.27.0")]
1349pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1350 src: __m256d,
1351 slice: *const f64,
1352 offsets: __m128i,
1353 mask: __m256d,
1354) -> __m256d {
1355 static_assert_imm8_scale!(SCALE);
1356 let offsets = offsets.as_i32x4();
1357 let slice = slice as *const i8;
1358 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1359}
1360
1361#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1372 slice: *const i32,
1373 offsets: __m128i,
1374) -> __m128i {
1375 static_assert_imm8_scale!(SCALE);
1376 let zero = i32x4::ZERO;
1377 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1378 let offsets = offsets.as_i64x2();
1379 let slice = slice as *const i8;
1380 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1381 transmute(r)
1382}
1383
1384#[inline]
1391#[target_feature(enable = "avx2")]
1392#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1393#[rustc_legacy_const_generics(4)]
1394#[stable(feature = "simd_x86", since = "1.27.0")]
1395pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1396 src: __m128i,
1397 slice: *const i32,
1398 offsets: __m128i,
1399 mask: __m128i,
1400) -> __m128i {
1401 static_assert_imm8_scale!(SCALE);
1402 let src = src.as_i32x4();
1403 let mask = mask.as_i32x4();
1404 let offsets = offsets.as_i64x2();
1405 let slice = slice as *const i8;
1406 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1407 transmute(r)
1408}
1409
1410#[inline]
1416#[target_feature(enable = "avx2")]
1417#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1418#[rustc_legacy_const_generics(2)]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1421 slice: *const i32,
1422 offsets: __m256i,
1423) -> __m128i {
1424 static_assert_imm8_scale!(SCALE);
1425 let zero = i32x4::ZERO;
1426 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1427 let offsets = offsets.as_i64x4();
1428 let slice = slice as *const i8;
1429 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1430 transmute(r)
1431}
1432
1433#[inline]
1440#[target_feature(enable = "avx2")]
1441#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1442#[rustc_legacy_const_generics(4)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1445 src: __m128i,
1446 slice: *const i32,
1447 offsets: __m256i,
1448 mask: __m128i,
1449) -> __m128i {
1450 static_assert_imm8_scale!(SCALE);
1451 let src = src.as_i32x4();
1452 let mask = mask.as_i32x4();
1453 let offsets = offsets.as_i64x4();
1454 let slice = slice as *const i8;
1455 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1456 transmute(r)
1457}
1458
1459#[inline]
1465#[target_feature(enable = "avx2")]
1466#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1470 static_assert_imm8_scale!(SCALE);
1471 let zero = _mm_setzero_ps();
1472 let neg_one = _mm_set1_ps(-1.0);
1473 let offsets = offsets.as_i64x2();
1474 let slice = slice as *const i8;
1475 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1476}
1477
1478#[inline]
1485#[target_feature(enable = "avx2")]
1486#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1487#[rustc_legacy_const_generics(4)]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1490 src: __m128,
1491 slice: *const f32,
1492 offsets: __m128i,
1493 mask: __m128,
1494) -> __m128 {
1495 static_assert_imm8_scale!(SCALE);
1496 let offsets = offsets.as_i64x2();
1497 let slice = slice as *const i8;
1498 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1499}
1500
1501#[inline]
1507#[target_feature(enable = "avx2")]
1508#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1509#[rustc_legacy_const_generics(2)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1512 static_assert_imm8_scale!(SCALE);
1513 let zero = _mm_setzero_ps();
1514 let neg_one = _mm_set1_ps(-1.0);
1515 let offsets = offsets.as_i64x4();
1516 let slice = slice as *const i8;
1517 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1518}
1519
1520#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1532 src: __m128,
1533 slice: *const f32,
1534 offsets: __m256i,
1535 mask: __m128,
1536) -> __m128 {
1537 static_assert_imm8_scale!(SCALE);
1538 let offsets = offsets.as_i64x4();
1539 let slice = slice as *const i8;
1540 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1541}
1542
1543#[inline]
1549#[target_feature(enable = "avx2")]
1550#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1551#[rustc_legacy_const_generics(2)]
1552#[stable(feature = "simd_x86", since = "1.27.0")]
1553pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1554 slice: *const i64,
1555 offsets: __m128i,
1556) -> __m128i {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero = i64x2::ZERO;
1559 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1560 let slice = slice as *const i8;
1561 let offsets = offsets.as_i64x2();
1562 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1563 transmute(r)
1564}
1565
1566#[inline]
1573#[target_feature(enable = "avx2")]
1574#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1575#[rustc_legacy_const_generics(4)]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1578 src: __m128i,
1579 slice: *const i64,
1580 offsets: __m128i,
1581 mask: __m128i,
1582) -> __m128i {
1583 static_assert_imm8_scale!(SCALE);
1584 let src = src.as_i64x2();
1585 let mask = mask.as_i64x2();
1586 let offsets = offsets.as_i64x2();
1587 let slice = slice as *const i8;
1588 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1589 transmute(r)
1590}
1591
1592#[inline]
1598#[target_feature(enable = "avx2")]
1599#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1600#[rustc_legacy_const_generics(2)]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1603 slice: *const i64,
1604 offsets: __m256i,
1605) -> __m256i {
1606 static_assert_imm8_scale!(SCALE);
1607 let zero = i64x4::ZERO;
1608 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1609 let slice = slice as *const i8;
1610 let offsets = offsets.as_i64x4();
1611 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1612 transmute(r)
1613}
1614
1615#[inline]
1622#[target_feature(enable = "avx2")]
1623#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1624#[rustc_legacy_const_generics(4)]
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1627 src: __m256i,
1628 slice: *const i64,
1629 offsets: __m256i,
1630 mask: __m256i,
1631) -> __m256i {
1632 static_assert_imm8_scale!(SCALE);
1633 let src = src.as_i64x4();
1634 let mask = mask.as_i64x4();
1635 let offsets = offsets.as_i64x4();
1636 let slice = slice as *const i8;
1637 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1638 transmute(r)
1639}
1640
1641#[inline]
1647#[target_feature(enable = "avx2")]
1648#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1649#[rustc_legacy_const_generics(2)]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1652 static_assert_imm8_scale!(SCALE);
1653 let zero = _mm_setzero_pd();
1654 let neg_one = _mm_set1_pd(-1.0);
1655 let slice = slice as *const i8;
1656 let offsets = offsets.as_i64x2();
1657 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1658}
1659
1660#[inline]
1667#[target_feature(enable = "avx2")]
1668#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1669#[rustc_legacy_const_generics(4)]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1672 src: __m128d,
1673 slice: *const f64,
1674 offsets: __m128i,
1675 mask: __m128d,
1676) -> __m128d {
1677 static_assert_imm8_scale!(SCALE);
1678 let slice = slice as *const i8;
1679 let offsets = offsets.as_i64x2();
1680 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1681}
1682
1683#[inline]
1689#[target_feature(enable = "avx2")]
1690#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1691#[rustc_legacy_const_generics(2)]
1692#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1694 slice: *const f64,
1695 offsets: __m256i,
1696) -> __m256d {
1697 static_assert_imm8_scale!(SCALE);
1698 let zero = _mm256_setzero_pd();
1699 let neg_one = _mm256_set1_pd(-1.0);
1700 let slice = slice as *const i8;
1701 let offsets = offsets.as_i64x4();
1702 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1703}
1704
1705#[inline]
1712#[target_feature(enable = "avx2")]
1713#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1714#[rustc_legacy_const_generics(4)]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1717 src: __m256d,
1718 slice: *const f64,
1719 offsets: __m256i,
1720 mask: __m256d,
1721) -> __m256d {
1722 static_assert_imm8_scale!(SCALE);
1723 let slice = slice as *const i8;
1724 let offsets = offsets.as_i64x4();
1725 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1726}
1727
1728#[inline]
1733#[target_feature(enable = "avx2")]
1734#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1735#[rustc_legacy_const_generics(2)]
1736#[stable(feature = "simd_x86", since = "1.27.0")]
1737pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1738 static_assert_uimm_bits!(IMM1, 1);
1739 unsafe {
1740 let a = a.as_i64x4();
1741 let b = _mm256_castsi128_si256(b).as_i64x4();
1742 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1743 transmute(dst)
1744 }
1745}
1746
1747#[inline]
1753#[target_feature(enable = "avx2")]
1754#[cfg_attr(test, assert_instr(vpmaddwd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1757 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1770}
1771
1772#[inline]
1779#[target_feature(enable = "avx2")]
1780#[cfg_attr(test, assert_instr(vpmaddubsw))]
1781#[stable(feature = "simd_x86", since = "1.27.0")]
1782pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1783 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1784}
1785
1786#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vpmaskmovd))]
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1795pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1796 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1797 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1798}
1799
1800#[inline]
1806#[target_feature(enable = "avx2")]
1807#[cfg_attr(test, assert_instr(vpmaskmovd))]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1810 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1811 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1812}
1813
1814#[inline]
1820#[target_feature(enable = "avx2")]
1821#[cfg_attr(test, assert_instr(vpmaskmovq))]
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1823pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1824 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1825 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1826}
1827
1828#[inline]
1834#[target_feature(enable = "avx2")]
1835#[cfg_attr(test, assert_instr(vpmaskmovq))]
1836#[stable(feature = "simd_x86", since = "1.27.0")]
1837pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1838 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1839 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1840}
1841
1842#[inline]
1848#[target_feature(enable = "avx2")]
1849#[cfg_attr(test, assert_instr(vpmaskmovd))]
1850#[stable(feature = "simd_x86", since = "1.27.0")]
1851pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1852 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1853 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1854}
1855
1856#[inline]
1862#[target_feature(enable = "avx2")]
1863#[cfg_attr(test, assert_instr(vpmaskmovd))]
1864#[stable(feature = "simd_x86", since = "1.27.0")]
1865pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1866 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1867 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1868}
1869
1870#[inline]
1876#[target_feature(enable = "avx2")]
1877#[cfg_attr(test, assert_instr(vpmaskmovq))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1880 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1881 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1882}
1883
1884#[inline]
1890#[target_feature(enable = "avx2")]
1891#[cfg_attr(test, assert_instr(vpmaskmovq))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1894 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1895 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1896}
1897
1898#[inline]
1903#[target_feature(enable = "avx2")]
1904#[cfg_attr(test, assert_instr(vpmaxsw))]
1905#[stable(feature = "simd_x86", since = "1.27.0")]
1906pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1907 unsafe {
1908 let a = a.as_i16x16();
1909 let b = b.as_i16x16();
1910 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1911 }
1912}
1913
1914#[inline]
1919#[target_feature(enable = "avx2")]
1920#[cfg_attr(test, assert_instr(vpmaxsd))]
1921#[stable(feature = "simd_x86", since = "1.27.0")]
1922pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1923 unsafe {
1924 let a = a.as_i32x8();
1925 let b = b.as_i32x8();
1926 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1927 }
1928}
1929
1930#[inline]
1935#[target_feature(enable = "avx2")]
1936#[cfg_attr(test, assert_instr(vpmaxsb))]
1937#[stable(feature = "simd_x86", since = "1.27.0")]
1938pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1939 unsafe {
1940 let a = a.as_i8x32();
1941 let b = b.as_i8x32();
1942 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1943 }
1944}
1945
1946#[inline]
1951#[target_feature(enable = "avx2")]
1952#[cfg_attr(test, assert_instr(vpmaxuw))]
1953#[stable(feature = "simd_x86", since = "1.27.0")]
1954pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1955 unsafe {
1956 let a = a.as_u16x16();
1957 let b = b.as_u16x16();
1958 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1959 }
1960}
1961
1962#[inline]
1967#[target_feature(enable = "avx2")]
1968#[cfg_attr(test, assert_instr(vpmaxud))]
1969#[stable(feature = "simd_x86", since = "1.27.0")]
1970pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1971 unsafe {
1972 let a = a.as_u32x8();
1973 let b = b.as_u32x8();
1974 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1975 }
1976}
1977
1978#[inline]
1983#[target_feature(enable = "avx2")]
1984#[cfg_attr(test, assert_instr(vpmaxub))]
1985#[stable(feature = "simd_x86", since = "1.27.0")]
1986pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1987 unsafe {
1988 let a = a.as_u8x32();
1989 let b = b.as_u8x32();
1990 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1991 }
1992}
1993
1994#[inline]
1999#[target_feature(enable = "avx2")]
2000#[cfg_attr(test, assert_instr(vpminsw))]
2001#[stable(feature = "simd_x86", since = "1.27.0")]
2002pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2003 unsafe {
2004 let a = a.as_i16x16();
2005 let b = b.as_i16x16();
2006 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2007 }
2008}
2009
2010#[inline]
2015#[target_feature(enable = "avx2")]
2016#[cfg_attr(test, assert_instr(vpminsd))]
2017#[stable(feature = "simd_x86", since = "1.27.0")]
2018pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2019 unsafe {
2020 let a = a.as_i32x8();
2021 let b = b.as_i32x8();
2022 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2023 }
2024}
2025
2026#[inline]
2031#[target_feature(enable = "avx2")]
2032#[cfg_attr(test, assert_instr(vpminsb))]
2033#[stable(feature = "simd_x86", since = "1.27.0")]
2034pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2035 unsafe {
2036 let a = a.as_i8x32();
2037 let b = b.as_i8x32();
2038 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2039 }
2040}
2041
2042#[inline]
2047#[target_feature(enable = "avx2")]
2048#[cfg_attr(test, assert_instr(vpminuw))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2051 unsafe {
2052 let a = a.as_u16x16();
2053 let b = b.as_u16x16();
2054 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2055 }
2056}
2057
2058#[inline]
2063#[target_feature(enable = "avx2")]
2064#[cfg_attr(test, assert_instr(vpminud))]
2065#[stable(feature = "simd_x86", since = "1.27.0")]
2066pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2067 unsafe {
2068 let a = a.as_u32x8();
2069 let b = b.as_u32x8();
2070 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2071 }
2072}
2073
2074#[inline]
2079#[target_feature(enable = "avx2")]
2080#[cfg_attr(test, assert_instr(vpminub))]
2081#[stable(feature = "simd_x86", since = "1.27.0")]
2082pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2083 unsafe {
2084 let a = a.as_u8x32();
2085 let b = b.as_u8x32();
2086 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2087 }
2088}
2089
2090#[inline]
2095#[target_feature(enable = "avx2")]
2096#[cfg_attr(test, assert_instr(vpmovmskb))]
2097#[stable(feature = "simd_x86", since = "1.27.0")]
2098pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2099 unsafe {
2100 let z = i8x32::ZERO;
2101 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2102 simd_bitmask::<_, u32>(m) as i32
2103 }
2104}
2105
2106#[inline]
2116#[target_feature(enable = "avx2")]
2117#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2118#[rustc_legacy_const_generics(2)]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2121 static_assert_uimm_bits!(IMM8, 8);
2122 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2123}
2124
2125#[inline]
2132#[target_feature(enable = "avx2")]
2133#[cfg_attr(test, assert_instr(vpmuldq))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2136 unsafe {
2137 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2138 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2139 transmute(simd_mul(a, b))
2140 }
2141}
2142
2143#[inline]
2150#[target_feature(enable = "avx2")]
2151#[cfg_attr(test, assert_instr(vpmuludq))]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2154 unsafe {
2155 let a = a.as_u64x4();
2156 let b = b.as_u64x4();
2157 let mask = u64x4::splat(u32::MAX.into());
2158 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2159 }
2160}
2161
2162#[inline]
2168#[target_feature(enable = "avx2")]
2169#[cfg_attr(test, assert_instr(vpmulhw))]
2170#[stable(feature = "simd_x86", since = "1.27.0")]
2171pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2172 unsafe {
2173 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2174 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2175 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2176 transmute(simd_cast::<i32x16, i16x16>(r))
2177 }
2178}
2179
2180#[inline]
2186#[target_feature(enable = "avx2")]
2187#[cfg_attr(test, assert_instr(vpmulhuw))]
2188#[stable(feature = "simd_x86", since = "1.27.0")]
2189pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2190 unsafe {
2191 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2192 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2193 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2194 transmute(simd_cast::<u32x16, u16x16>(r))
2195 }
2196}
2197
2198#[inline]
2204#[target_feature(enable = "avx2")]
2205#[cfg_attr(test, assert_instr(vpmullw))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2208 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2209}
2210
2211#[inline]
2217#[target_feature(enable = "avx2")]
2218#[cfg_attr(test, assert_instr(vpmulld))]
2219#[stable(feature = "simd_x86", since = "1.27.0")]
2220pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2221 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2222}
2223
2224#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmulhrsw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2235 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2236}
2237
2238#[inline]
2243#[target_feature(enable = "avx2")]
2244#[cfg_attr(test, assert_instr(vorps))]
2245#[stable(feature = "simd_x86", since = "1.27.0")]
2246pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2247 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2248}
2249
2250#[inline]
2255#[target_feature(enable = "avx2")]
2256#[cfg_attr(test, assert_instr(vpacksswb))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2259 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2260}
2261
2262#[inline]
2267#[target_feature(enable = "avx2")]
2268#[cfg_attr(test, assert_instr(vpackssdw))]
2269#[stable(feature = "simd_x86", since = "1.27.0")]
2270pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2271 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2272}
2273
2274#[inline]
2279#[target_feature(enable = "avx2")]
2280#[cfg_attr(test, assert_instr(vpackuswb))]
2281#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2283 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2284}
2285
2286#[inline]
2291#[target_feature(enable = "avx2")]
2292#[cfg_attr(test, assert_instr(vpackusdw))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2295 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2296}
2297
2298#[inline]
2305#[target_feature(enable = "avx2")]
2306#[cfg_attr(test, assert_instr(vpermps))]
2307#[stable(feature = "simd_x86", since = "1.27.0")]
2308pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2309 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2310}
2311
2312#[inline]
2316#[target_feature(enable = "avx2")]
2317#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2318#[rustc_legacy_const_generics(1)]
2319#[stable(feature = "simd_x86", since = "1.27.0")]
2320pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2321 static_assert_uimm_bits!(IMM8, 8);
2322 unsafe {
2323 let zero = i64x4::ZERO;
2324 let r: i64x4 = simd_shuffle!(
2325 a.as_i64x4(),
2326 zero,
2327 [
2328 IMM8 as u32 & 0b11,
2329 (IMM8 as u32 >> 2) & 0b11,
2330 (IMM8 as u32 >> 4) & 0b11,
2331 (IMM8 as u32 >> 6) & 0b11,
2332 ],
2333 );
2334 transmute(r)
2335 }
2336}
2337
2338#[inline]
2342#[target_feature(enable = "avx2")]
2343#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2344#[rustc_legacy_const_generics(2)]
2345#[stable(feature = "simd_x86", since = "1.27.0")]
2346pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2347 static_assert_uimm_bits!(IMM8, 8);
2348 _mm256_permute2f128_si256::<IMM8>(a, b)
2349}
2350
2351#[inline]
2356#[target_feature(enable = "avx2")]
2357#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2358#[rustc_legacy_const_generics(1)]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2361 static_assert_uimm_bits!(IMM8, 8);
2362 unsafe {
2363 simd_shuffle!(
2364 a,
2365 _mm256_undefined_pd(),
2366 [
2367 IMM8 as u32 & 0b11,
2368 (IMM8 as u32 >> 2) & 0b11,
2369 (IMM8 as u32 >> 4) & 0b11,
2370 (IMM8 as u32 >> 6) & 0b11,
2371 ],
2372 )
2373 }
2374}
2375
2376#[inline]
2381#[target_feature(enable = "avx2")]
2382#[cfg_attr(test, assert_instr(vpermps))]
2383#[stable(feature = "simd_x86", since = "1.27.0")]
2384pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2385 unsafe { permps(a, idx.as_i32x8()) }
2386}
2387
2388#[inline]
2395#[target_feature(enable = "avx2")]
2396#[cfg_attr(test, assert_instr(vpsadbw))]
2397#[stable(feature = "simd_x86", since = "1.27.0")]
2398pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2399 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2400}
2401
2402#[inline]
2433#[target_feature(enable = "avx2")]
2434#[cfg_attr(test, assert_instr(vpshufb))]
2435#[stable(feature = "simd_x86", since = "1.27.0")]
2436pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2437 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2438}
2439
2440#[inline]
2471#[target_feature(enable = "avx2")]
2472#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2473#[rustc_legacy_const_generics(1)]
2474#[stable(feature = "simd_x86", since = "1.27.0")]
2475pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2476 static_assert_uimm_bits!(MASK, 8);
2477 unsafe {
2478 let r: i32x8 = simd_shuffle!(
2479 a.as_i32x8(),
2480 a.as_i32x8(),
2481 [
2482 MASK as u32 & 0b11,
2483 (MASK as u32 >> 2) & 0b11,
2484 (MASK as u32 >> 4) & 0b11,
2485 (MASK as u32 >> 6) & 0b11,
2486 (MASK as u32 & 0b11) + 4,
2487 ((MASK as u32 >> 2) & 0b11) + 4,
2488 ((MASK as u32 >> 4) & 0b11) + 4,
2489 ((MASK as u32 >> 6) & 0b11) + 4,
2490 ],
2491 );
2492 transmute(r)
2493 }
2494}
2495
2496#[inline]
2502#[target_feature(enable = "avx2")]
2503#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2504#[rustc_legacy_const_generics(1)]
2505#[stable(feature = "simd_x86", since = "1.27.0")]
2506pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2507 static_assert_uimm_bits!(IMM8, 8);
2508 unsafe {
2509 let a = a.as_i16x16();
2510 let r: i16x16 = simd_shuffle!(
2511 a,
2512 a,
2513 [
2514 0,
2515 1,
2516 2,
2517 3,
2518 4 + (IMM8 as u32 & 0b11),
2519 4 + ((IMM8 as u32 >> 2) & 0b11),
2520 4 + ((IMM8 as u32 >> 4) & 0b11),
2521 4 + ((IMM8 as u32 >> 6) & 0b11),
2522 8,
2523 9,
2524 10,
2525 11,
2526 12 + (IMM8 as u32 & 0b11),
2527 12 + ((IMM8 as u32 >> 2) & 0b11),
2528 12 + ((IMM8 as u32 >> 4) & 0b11),
2529 12 + ((IMM8 as u32 >> 6) & 0b11),
2530 ],
2531 );
2532 transmute(r)
2533 }
2534}
2535
2536#[inline]
2542#[target_feature(enable = "avx2")]
2543#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2544#[rustc_legacy_const_generics(1)]
2545#[stable(feature = "simd_x86", since = "1.27.0")]
2546pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2547 static_assert_uimm_bits!(IMM8, 8);
2548 unsafe {
2549 let a = a.as_i16x16();
2550 let r: i16x16 = simd_shuffle!(
2551 a,
2552 a,
2553 [
2554 0 + (IMM8 as u32 & 0b11),
2555 0 + ((IMM8 as u32 >> 2) & 0b11),
2556 0 + ((IMM8 as u32 >> 4) & 0b11),
2557 0 + ((IMM8 as u32 >> 6) & 0b11),
2558 4,
2559 5,
2560 6,
2561 7,
2562 8 + (IMM8 as u32 & 0b11),
2563 8 + ((IMM8 as u32 >> 2) & 0b11),
2564 8 + ((IMM8 as u32 >> 4) & 0b11),
2565 8 + ((IMM8 as u32 >> 6) & 0b11),
2566 12,
2567 13,
2568 14,
2569 15,
2570 ],
2571 );
2572 transmute(r)
2573 }
2574}
2575
2576#[inline]
2582#[target_feature(enable = "avx2")]
2583#[cfg_attr(test, assert_instr(vpsignw))]
2584#[stable(feature = "simd_x86", since = "1.27.0")]
2585pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2586 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2587}
2588
2589#[inline]
2595#[target_feature(enable = "avx2")]
2596#[cfg_attr(test, assert_instr(vpsignd))]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2598pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2599 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2600}
2601
2602#[inline]
2608#[target_feature(enable = "avx2")]
2609#[cfg_attr(test, assert_instr(vpsignb))]
2610#[stable(feature = "simd_x86", since = "1.27.0")]
2611pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2612 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2613}
2614
2615#[inline]
2620#[target_feature(enable = "avx2")]
2621#[cfg_attr(test, assert_instr(vpsllw))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2624 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2625}
2626
2627#[inline]
2632#[target_feature(enable = "avx2")]
2633#[cfg_attr(test, assert_instr(vpslld))]
2634#[stable(feature = "simd_x86", since = "1.27.0")]
2635pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2636 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2637}
2638
2639#[inline]
2644#[target_feature(enable = "avx2")]
2645#[cfg_attr(test, assert_instr(vpsllq))]
2646#[stable(feature = "simd_x86", since = "1.27.0")]
2647pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2648 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2649}
2650
2651#[inline]
2656#[target_feature(enable = "avx2")]
2657#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2658#[rustc_legacy_const_generics(1)]
2659#[stable(feature = "simd_x86", since = "1.27.0")]
2660pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2661 static_assert_uimm_bits!(IMM8, 8);
2662 unsafe {
2663 if IMM8 >= 16 {
2664 _mm256_setzero_si256()
2665 } else {
2666 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2667 }
2668 }
2669}
2670
2671#[inline]
2676#[target_feature(enable = "avx2")]
2677#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2678#[rustc_legacy_const_generics(1)]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2681 unsafe {
2682 static_assert_uimm_bits!(IMM8, 8);
2683 if IMM8 >= 32 {
2684 _mm256_setzero_si256()
2685 } else {
2686 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2687 }
2688 }
2689}
2690
2691#[inline]
2696#[target_feature(enable = "avx2")]
2697#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2698#[rustc_legacy_const_generics(1)]
2699#[stable(feature = "simd_x86", since = "1.27.0")]
2700pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2701 unsafe {
2702 static_assert_uimm_bits!(IMM8, 8);
2703 if IMM8 >= 64 {
2704 _mm256_setzero_si256()
2705 } else {
2706 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2707 }
2708 }
2709}
2710
2711#[inline]
2715#[target_feature(enable = "avx2")]
2716#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2717#[rustc_legacy_const_generics(1)]
2718#[stable(feature = "simd_x86", since = "1.27.0")]
2719pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2720 static_assert_uimm_bits!(IMM8, 8);
2721 _mm256_bslli_epi128::<IMM8>(a)
2722}
2723
2724#[inline]
2728#[target_feature(enable = "avx2")]
2729#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2730#[rustc_legacy_const_generics(1)]
2731#[stable(feature = "simd_x86", since = "1.27.0")]
2732pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2733 static_assert_uimm_bits!(IMM8, 8);
2734 const fn mask(shift: i32, i: u32) -> u32 {
2735 let shift = shift as u32 & 0xff;
2736 if shift > 15 || i % 16 < shift {
2737 0
2738 } else {
2739 32 + (i - shift)
2740 }
2741 }
2742 unsafe {
2743 let a = a.as_i8x32();
2744 let r: i8x32 = simd_shuffle!(
2745 i8x32::ZERO,
2746 a,
2747 [
2748 mask(IMM8, 0),
2749 mask(IMM8, 1),
2750 mask(IMM8, 2),
2751 mask(IMM8, 3),
2752 mask(IMM8, 4),
2753 mask(IMM8, 5),
2754 mask(IMM8, 6),
2755 mask(IMM8, 7),
2756 mask(IMM8, 8),
2757 mask(IMM8, 9),
2758 mask(IMM8, 10),
2759 mask(IMM8, 11),
2760 mask(IMM8, 12),
2761 mask(IMM8, 13),
2762 mask(IMM8, 14),
2763 mask(IMM8, 15),
2764 mask(IMM8, 16),
2765 mask(IMM8, 17),
2766 mask(IMM8, 18),
2767 mask(IMM8, 19),
2768 mask(IMM8, 20),
2769 mask(IMM8, 21),
2770 mask(IMM8, 22),
2771 mask(IMM8, 23),
2772 mask(IMM8, 24),
2773 mask(IMM8, 25),
2774 mask(IMM8, 26),
2775 mask(IMM8, 27),
2776 mask(IMM8, 28),
2777 mask(IMM8, 29),
2778 mask(IMM8, 30),
2779 mask(IMM8, 31),
2780 ],
2781 );
2782 transmute(r)
2783 }
2784}
2785
2786#[inline]
2792#[target_feature(enable = "avx2")]
2793#[cfg_attr(test, assert_instr(vpsllvd))]
2794#[stable(feature = "simd_x86", since = "1.27.0")]
2795pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2796 unsafe {
2797 let count = count.as_u32x4();
2798 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2799 let count = simd_select(no_overflow, count, u32x4::ZERO);
2800 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2801 }
2802}
2803
2804#[inline]
2810#[target_feature(enable = "avx2")]
2811#[cfg_attr(test, assert_instr(vpsllvd))]
2812#[stable(feature = "simd_x86", since = "1.27.0")]
2813pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2814 unsafe {
2815 let count = count.as_u32x8();
2816 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2817 let count = simd_select(no_overflow, count, u32x8::ZERO);
2818 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2819 }
2820}
2821
2822#[inline]
2828#[target_feature(enable = "avx2")]
2829#[cfg_attr(test, assert_instr(vpsllvq))]
2830#[stable(feature = "simd_x86", since = "1.27.0")]
2831pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2832 unsafe {
2833 let count = count.as_u64x2();
2834 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2835 let count = simd_select(no_overflow, count, u64x2::ZERO);
2836 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2837 }
2838}
2839
2840#[inline]
2846#[target_feature(enable = "avx2")]
2847#[cfg_attr(test, assert_instr(vpsllvq))]
2848#[stable(feature = "simd_x86", since = "1.27.0")]
2849pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2850 unsafe {
2851 let count = count.as_u64x4();
2852 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2853 let count = simd_select(no_overflow, count, u64x4::ZERO);
2854 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2855 }
2856}
2857
2858#[inline]
2863#[target_feature(enable = "avx2")]
2864#[cfg_attr(test, assert_instr(vpsraw))]
2865#[stable(feature = "simd_x86", since = "1.27.0")]
2866pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2867 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2868}
2869
2870#[inline]
2875#[target_feature(enable = "avx2")]
2876#[cfg_attr(test, assert_instr(vpsrad))]
2877#[stable(feature = "simd_x86", since = "1.27.0")]
2878pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2879 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2880}
2881
2882#[inline]
2887#[target_feature(enable = "avx2")]
2888#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2889#[rustc_legacy_const_generics(1)]
2890#[stable(feature = "simd_x86", since = "1.27.0")]
2891pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2892 static_assert_uimm_bits!(IMM8, 8);
2893 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2894}
2895
2896#[inline]
2901#[target_feature(enable = "avx2")]
2902#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2903#[rustc_legacy_const_generics(1)]
2904#[stable(feature = "simd_x86", since = "1.27.0")]
2905pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2906 static_assert_uimm_bits!(IMM8, 8);
2907 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2908}
2909
2910#[inline]
2915#[target_feature(enable = "avx2")]
2916#[cfg_attr(test, assert_instr(vpsravd))]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2918pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2919 unsafe {
2920 let count = count.as_u32x4();
2921 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2922 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2923 simd_shr(a.as_i32x4(), count).as_m128i()
2924 }
2925}
2926
2927#[inline]
2932#[target_feature(enable = "avx2")]
2933#[cfg_attr(test, assert_instr(vpsravd))]
2934#[stable(feature = "simd_x86", since = "1.27.0")]
2935pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2936 unsafe {
2937 let count = count.as_u32x8();
2938 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2939 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2940 simd_shr(a.as_i32x8(), count).as_m256i()
2941 }
2942}
2943
2944#[inline]
2948#[target_feature(enable = "avx2")]
2949#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2950#[rustc_legacy_const_generics(1)]
2951#[stable(feature = "simd_x86", since = "1.27.0")]
2952pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2953 static_assert_uimm_bits!(IMM8, 8);
2954 _mm256_bsrli_epi128::<IMM8>(a)
2955}
2956
2957#[inline]
2961#[target_feature(enable = "avx2")]
2962#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2963#[rustc_legacy_const_generics(1)]
2964#[stable(feature = "simd_x86", since = "1.27.0")]
2965pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2966 static_assert_uimm_bits!(IMM8, 8);
2967 const fn mask(shift: i32, i: u32) -> u32 {
2968 let shift = shift as u32 & 0xff;
2969 if shift > 15 || (15 - (i % 16)) < shift {
2970 0
2971 } else {
2972 32 + (i + shift)
2973 }
2974 }
2975 unsafe {
2976 let a = a.as_i8x32();
2977 let r: i8x32 = simd_shuffle!(
2978 i8x32::ZERO,
2979 a,
2980 [
2981 mask(IMM8, 0),
2982 mask(IMM8, 1),
2983 mask(IMM8, 2),
2984 mask(IMM8, 3),
2985 mask(IMM8, 4),
2986 mask(IMM8, 5),
2987 mask(IMM8, 6),
2988 mask(IMM8, 7),
2989 mask(IMM8, 8),
2990 mask(IMM8, 9),
2991 mask(IMM8, 10),
2992 mask(IMM8, 11),
2993 mask(IMM8, 12),
2994 mask(IMM8, 13),
2995 mask(IMM8, 14),
2996 mask(IMM8, 15),
2997 mask(IMM8, 16),
2998 mask(IMM8, 17),
2999 mask(IMM8, 18),
3000 mask(IMM8, 19),
3001 mask(IMM8, 20),
3002 mask(IMM8, 21),
3003 mask(IMM8, 22),
3004 mask(IMM8, 23),
3005 mask(IMM8, 24),
3006 mask(IMM8, 25),
3007 mask(IMM8, 26),
3008 mask(IMM8, 27),
3009 mask(IMM8, 28),
3010 mask(IMM8, 29),
3011 mask(IMM8, 30),
3012 mask(IMM8, 31),
3013 ],
3014 );
3015 transmute(r)
3016 }
3017}
3018
3019#[inline]
3024#[target_feature(enable = "avx2")]
3025#[cfg_attr(test, assert_instr(vpsrlw))]
3026#[stable(feature = "simd_x86", since = "1.27.0")]
3027pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3028 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3029}
3030
3031#[inline]
3036#[target_feature(enable = "avx2")]
3037#[cfg_attr(test, assert_instr(vpsrld))]
3038#[stable(feature = "simd_x86", since = "1.27.0")]
3039pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3040 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3041}
3042
3043#[inline]
3048#[target_feature(enable = "avx2")]
3049#[cfg_attr(test, assert_instr(vpsrlq))]
3050#[stable(feature = "simd_x86", since = "1.27.0")]
3051pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3052 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3053}
3054
3055#[inline]
3060#[target_feature(enable = "avx2")]
3061#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3062#[rustc_legacy_const_generics(1)]
3063#[stable(feature = "simd_x86", since = "1.27.0")]
3064pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3065 static_assert_uimm_bits!(IMM8, 8);
3066 unsafe {
3067 if IMM8 >= 16 {
3068 _mm256_setzero_si256()
3069 } else {
3070 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3071 }
3072 }
3073}
3074
3075#[inline]
3080#[target_feature(enable = "avx2")]
3081#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3082#[rustc_legacy_const_generics(1)]
3083#[stable(feature = "simd_x86", since = "1.27.0")]
3084pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3085 static_assert_uimm_bits!(IMM8, 8);
3086 unsafe {
3087 if IMM8 >= 32 {
3088 _mm256_setzero_si256()
3089 } else {
3090 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3091 }
3092 }
3093}
3094
3095#[inline]
3100#[target_feature(enable = "avx2")]
3101#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3102#[rustc_legacy_const_generics(1)]
3103#[stable(feature = "simd_x86", since = "1.27.0")]
3104pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3105 static_assert_uimm_bits!(IMM8, 8);
3106 unsafe {
3107 if IMM8 >= 64 {
3108 _mm256_setzero_si256()
3109 } else {
3110 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3111 }
3112 }
3113}
3114
3115#[inline]
3120#[target_feature(enable = "avx2")]
3121#[cfg_attr(test, assert_instr(vpsrlvd))]
3122#[stable(feature = "simd_x86", since = "1.27.0")]
3123pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3124 unsafe {
3125 let count = count.as_u32x4();
3126 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3127 let count = simd_select(no_overflow, count, u32x4::ZERO);
3128 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3129 }
3130}
3131
3132#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsrlvd))]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3141 unsafe {
3142 let count = count.as_u32x8();
3143 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3144 let count = simd_select(no_overflow, count, u32x8::ZERO);
3145 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3146 }
3147}
3148
3149#[inline]
3154#[target_feature(enable = "avx2")]
3155#[cfg_attr(test, assert_instr(vpsrlvq))]
3156#[stable(feature = "simd_x86", since = "1.27.0")]
3157pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3158 unsafe {
3159 let count = count.as_u64x2();
3160 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3161 let count = simd_select(no_overflow, count, u64x2::ZERO);
3162 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3163 }
3164}
3165
3166#[inline]
3171#[target_feature(enable = "avx2")]
3172#[cfg_attr(test, assert_instr(vpsrlvq))]
3173#[stable(feature = "simd_x86", since = "1.27.0")]
3174pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3175 unsafe {
3176 let count = count.as_u64x4();
3177 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3178 let count = simd_select(no_overflow, count, u64x4::ZERO);
3179 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3180 }
3181}
3182
3183#[inline]
3189#[target_feature(enable = "avx2")]
3190#[cfg_attr(test, assert_instr(vmovntdqa))]
3191#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3192pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3193 let dst: __m256i;
3194 crate::arch::asm!(
3195 vpl!("vmovntdqa {a}"),
3196 a = out(ymm_reg) dst,
3197 p = in(reg) mem_addr,
3198 options(pure, readonly, nostack, preserves_flags),
3199 );
3200 dst
3201}
3202
3203#[inline]
3207#[target_feature(enable = "avx2")]
3208#[cfg_attr(test, assert_instr(vpsubw))]
3209#[stable(feature = "simd_x86", since = "1.27.0")]
3210pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3211 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3212}
3213
3214#[inline]
3218#[target_feature(enable = "avx2")]
3219#[cfg_attr(test, assert_instr(vpsubd))]
3220#[stable(feature = "simd_x86", since = "1.27.0")]
3221pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3222 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3223}
3224
3225#[inline]
3229#[target_feature(enable = "avx2")]
3230#[cfg_attr(test, assert_instr(vpsubq))]
3231#[stable(feature = "simd_x86", since = "1.27.0")]
3232pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3233 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3234}
3235
3236#[inline]
3240#[target_feature(enable = "avx2")]
3241#[cfg_attr(test, assert_instr(vpsubb))]
3242#[stable(feature = "simd_x86", since = "1.27.0")]
3243pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3244 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3245}
3246
3247#[inline]
3252#[target_feature(enable = "avx2")]
3253#[cfg_attr(test, assert_instr(vpsubsw))]
3254#[stable(feature = "simd_x86", since = "1.27.0")]
3255pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3256 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3257}
3258
3259#[inline]
3264#[target_feature(enable = "avx2")]
3265#[cfg_attr(test, assert_instr(vpsubsb))]
3266#[stable(feature = "simd_x86", since = "1.27.0")]
3267pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3268 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3269}
3270
3271#[inline]
3276#[target_feature(enable = "avx2")]
3277#[cfg_attr(test, assert_instr(vpsubusw))]
3278#[stable(feature = "simd_x86", since = "1.27.0")]
3279pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3280 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3281}
3282
3283#[inline]
3288#[target_feature(enable = "avx2")]
3289#[cfg_attr(test, assert_instr(vpsubusb))]
3290#[stable(feature = "simd_x86", since = "1.27.0")]
3291pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3292 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3293}
3294
3295#[inline]
3335#[target_feature(enable = "avx2")]
3336#[cfg_attr(test, assert_instr(vpunpckhbw))]
3337#[stable(feature = "simd_x86", since = "1.27.0")]
3338pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3339 unsafe {
3340 #[rustfmt::skip]
3341 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3342 8, 40, 9, 41, 10, 42, 11, 43,
3343 12, 44, 13, 45, 14, 46, 15, 47,
3344 24, 56, 25, 57, 26, 58, 27, 59,
3345 28, 60, 29, 61, 30, 62, 31, 63,
3346 ]);
3347 transmute(r)
3348 }
3349}
3350
3351#[inline]
3390#[target_feature(enable = "avx2")]
3391#[cfg_attr(test, assert_instr(vpunpcklbw))]
3392#[stable(feature = "simd_x86", since = "1.27.0")]
3393pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3394 unsafe {
3395 #[rustfmt::skip]
3396 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3397 0, 32, 1, 33, 2, 34, 3, 35,
3398 4, 36, 5, 37, 6, 38, 7, 39,
3399 16, 48, 17, 49, 18, 50, 19, 51,
3400 20, 52, 21, 53, 22, 54, 23, 55,
3401 ]);
3402 transmute(r)
3403 }
3404}
3405
3406#[inline]
3441#[target_feature(enable = "avx2")]
3442#[cfg_attr(test, assert_instr(vpunpckhwd))]
3443#[stable(feature = "simd_x86", since = "1.27.0")]
3444pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3445 unsafe {
3446 let r: i16x16 = simd_shuffle!(
3447 a.as_i16x16(),
3448 b.as_i16x16(),
3449 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3450 );
3451 transmute(r)
3452 }
3453}
3454
3455#[inline]
3491#[target_feature(enable = "avx2")]
3492#[cfg_attr(test, assert_instr(vpunpcklwd))]
3493#[stable(feature = "simd_x86", since = "1.27.0")]
3494pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3495 unsafe {
3496 let r: i16x16 = simd_shuffle!(
3497 a.as_i16x16(),
3498 b.as_i16x16(),
3499 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3500 );
3501 transmute(r)
3502 }
3503}
3504
3505#[inline]
3534#[target_feature(enable = "avx2")]
3535#[cfg_attr(test, assert_instr(vunpckhps))]
3536#[stable(feature = "simd_x86", since = "1.27.0")]
3537pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3538 unsafe {
3539 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3540 transmute(r)
3541 }
3542}
3543
3544#[inline]
3573#[target_feature(enable = "avx2")]
3574#[cfg_attr(test, assert_instr(vunpcklps))]
3575#[stable(feature = "simd_x86", since = "1.27.0")]
3576pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3577 unsafe {
3578 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3579 transmute(r)
3580 }
3581}
3582
3583#[inline]
3612#[target_feature(enable = "avx2")]
3613#[cfg_attr(test, assert_instr(vunpckhpd))]
3614#[stable(feature = "simd_x86", since = "1.27.0")]
3615pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3616 unsafe {
3617 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3618 transmute(r)
3619 }
3620}
3621
3622#[inline]
3651#[target_feature(enable = "avx2")]
3652#[cfg_attr(test, assert_instr(vunpcklpd))]
3653#[stable(feature = "simd_x86", since = "1.27.0")]
3654pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3655 unsafe {
3656 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3657 transmute(r)
3658 }
3659}
3660
3661#[inline]
3666#[target_feature(enable = "avx2")]
3667#[cfg_attr(test, assert_instr(vxorps))]
3668#[stable(feature = "simd_x86", since = "1.27.0")]
3669pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3670 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3671}
3672
3673#[inline]
3680#[target_feature(enable = "avx2")]
3681#[rustc_legacy_const_generics(1)]
3683#[stable(feature = "simd_x86", since = "1.27.0")]
3684pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3685 static_assert_uimm_bits!(INDEX, 5);
3686 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3687}
3688
3689#[inline]
3696#[target_feature(enable = "avx2")]
3697#[rustc_legacy_const_generics(1)]
3699#[stable(feature = "simd_x86", since = "1.27.0")]
3700pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3701 static_assert_uimm_bits!(INDEX, 4);
3702 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3703}
3704
3705#[allow(improper_ctypes)]
3706unsafe extern "C" {
3707 #[link_name = "llvm.x86.avx2.phadd.sw"]
3708 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3709 #[link_name = "llvm.x86.avx2.phsub.sw"]
3710 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3711 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3712 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3713 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3714 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3715 #[link_name = "llvm.x86.avx2.mpsadbw"]
3716 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3717 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3718 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3719 #[link_name = "llvm.x86.avx2.packsswb"]
3720 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3721 #[link_name = "llvm.x86.avx2.packssdw"]
3722 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3723 #[link_name = "llvm.x86.avx2.packuswb"]
3724 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3725 #[link_name = "llvm.x86.avx2.packusdw"]
3726 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3727 #[link_name = "llvm.x86.avx2.psad.bw"]
3728 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3729 #[link_name = "llvm.x86.avx2.psign.b"]
3730 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3731 #[link_name = "llvm.x86.avx2.psign.w"]
3732 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3733 #[link_name = "llvm.x86.avx2.psign.d"]
3734 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3735 #[link_name = "llvm.x86.avx2.psll.w"]
3736 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3737 #[link_name = "llvm.x86.avx2.psll.d"]
3738 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3739 #[link_name = "llvm.x86.avx2.psll.q"]
3740 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3741 #[link_name = "llvm.x86.avx2.psra.w"]
3742 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3743 #[link_name = "llvm.x86.avx2.psra.d"]
3744 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3745 #[link_name = "llvm.x86.avx2.psrl.w"]
3746 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3747 #[link_name = "llvm.x86.avx2.psrl.d"]
3748 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3749 #[link_name = "llvm.x86.avx2.psrl.q"]
3750 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3751 #[link_name = "llvm.x86.avx2.pshuf.b"]
3752 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3753 #[link_name = "llvm.x86.avx2.permd"]
3754 fn permd(a: u32x8, b: u32x8) -> u32x8;
3755 #[link_name = "llvm.x86.avx2.permps"]
3756 fn permps(a: __m256, b: i32x8) -> __m256;
3757 #[link_name = "llvm.x86.avx2.gather.d.d"]
3758 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3759 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3760 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3761 #[link_name = "llvm.x86.avx2.gather.d.q"]
3762 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3763 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3764 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3765 #[link_name = "llvm.x86.avx2.gather.q.d"]
3766 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3767 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3768 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3769 #[link_name = "llvm.x86.avx2.gather.q.q"]
3770 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3771 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3772 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3773 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3774 fn pgatherdpd(
3775 src: __m128d,
3776 slice: *const i8,
3777 offsets: i32x4,
3778 mask: __m128d,
3779 scale: i8,
3780 ) -> __m128d;
3781 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3782 fn vpgatherdpd(
3783 src: __m256d,
3784 slice: *const i8,
3785 offsets: i32x4,
3786 mask: __m256d,
3787 scale: i8,
3788 ) -> __m256d;
3789 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3790 fn pgatherqpd(
3791 src: __m128d,
3792 slice: *const i8,
3793 offsets: i64x2,
3794 mask: __m128d,
3795 scale: i8,
3796 ) -> __m128d;
3797 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3798 fn vpgatherqpd(
3799 src: __m256d,
3800 slice: *const i8,
3801 offsets: i64x4,
3802 mask: __m256d,
3803 scale: i8,
3804 ) -> __m256d;
3805 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3806 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3807 -> __m128;
3808 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3809 fn vpgatherdps(
3810 src: __m256,
3811 slice: *const i8,
3812 offsets: i32x8,
3813 mask: __m256,
3814 scale: i8,
3815 ) -> __m256;
3816 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3817 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3818 -> __m128;
3819 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3820 fn vpgatherqps(
3821 src: __m128,
3822 slice: *const i8,
3823 offsets: i64x4,
3824 mask: __m128,
3825 scale: i8,
3826 ) -> __m128;
3827}
3828
3829#[cfg(test)]
3830mod tests {
3831
3832 use stdarch_test::simd_test;
3833
3834 use crate::core_arch::x86::*;
3835
3836 #[simd_test(enable = "avx2")]
3837 unsafe fn test_mm256_abs_epi32() {
3838 #[rustfmt::skip]
3839 let a = _mm256_setr_epi32(
3840 0, 1, -1, i32::MAX,
3841 i32::MIN, 100, -100, -32,
3842 );
3843 let r = _mm256_abs_epi32(a);
3844 #[rustfmt::skip]
3845 let e = _mm256_setr_epi32(
3846 0, 1, 1, i32::MAX,
3847 i32::MAX.wrapping_add(1), 100, 100, 32,
3848 );
3849 assert_eq_m256i(r, e);
3850 }
3851
3852 #[simd_test(enable = "avx2")]
3853 unsafe fn test_mm256_abs_epi16() {
3854 #[rustfmt::skip]
3855 let a = _mm256_setr_epi16(
3856 0, 1, -1, 2, -2, 3, -3, 4,
3857 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3858 );
3859 let r = _mm256_abs_epi16(a);
3860 #[rustfmt::skip]
3861 let e = _mm256_setr_epi16(
3862 0, 1, 1, 2, 2, 3, 3, 4,
3863 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3864 );
3865 assert_eq_m256i(r, e);
3866 }
3867
3868 #[simd_test(enable = "avx2")]
3869 unsafe fn test_mm256_abs_epi8() {
3870 #[rustfmt::skip]
3871 let a = _mm256_setr_epi8(
3872 0, 1, -1, 2, -2, 3, -3, 4,
3873 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3874 0, 1, -1, 2, -2, 3, -3, 4,
3875 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3876 );
3877 let r = _mm256_abs_epi8(a);
3878 #[rustfmt::skip]
3879 let e = _mm256_setr_epi8(
3880 0, 1, 1, 2, 2, 3, 3, 4,
3881 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3882 0, 1, 1, 2, 2, 3, 3, 4,
3883 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3884 );
3885 assert_eq_m256i(r, e);
3886 }
3887
3888 #[simd_test(enable = "avx2")]
3889 unsafe fn test_mm256_add_epi64() {
3890 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3891 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3892 let r = _mm256_add_epi64(a, b);
3893 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3894 assert_eq_m256i(r, e);
3895 }
3896
3897 #[simd_test(enable = "avx2")]
3898 unsafe fn test_mm256_add_epi32() {
3899 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3900 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3901 let r = _mm256_add_epi32(a, b);
3902 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3903 assert_eq_m256i(r, e);
3904 }
3905
3906 #[simd_test(enable = "avx2")]
3907 unsafe fn test_mm256_add_epi16() {
3908 #[rustfmt::skip]
3909 let a = _mm256_setr_epi16(
3910 0, 1, 2, 3, 4, 5, 6, 7,
3911 8, 9, 10, 11, 12, 13, 14, 15,
3912 );
3913 #[rustfmt::skip]
3914 let b = _mm256_setr_epi16(
3915 0, 1, 2, 3, 4, 5, 6, 7,
3916 8, 9, 10, 11, 12, 13, 14, 15,
3917 );
3918 let r = _mm256_add_epi16(a, b);
3919 #[rustfmt::skip]
3920 let e = _mm256_setr_epi16(
3921 0, 2, 4, 6, 8, 10, 12, 14,
3922 16, 18, 20, 22, 24, 26, 28, 30,
3923 );
3924 assert_eq_m256i(r, e);
3925 }
3926
3927 #[simd_test(enable = "avx2")]
3928 unsafe fn test_mm256_add_epi8() {
3929 #[rustfmt::skip]
3930 let a = _mm256_setr_epi8(
3931 0, 1, 2, 3, 4, 5, 6, 7,
3932 8, 9, 10, 11, 12, 13, 14, 15,
3933 16, 17, 18, 19, 20, 21, 22, 23,
3934 24, 25, 26, 27, 28, 29, 30, 31,
3935 );
3936 #[rustfmt::skip]
3937 let b = _mm256_setr_epi8(
3938 0, 1, 2, 3, 4, 5, 6, 7,
3939 8, 9, 10, 11, 12, 13, 14, 15,
3940 16, 17, 18, 19, 20, 21, 22, 23,
3941 24, 25, 26, 27, 28, 29, 30, 31,
3942 );
3943 let r = _mm256_add_epi8(a, b);
3944 #[rustfmt::skip]
3945 let e = _mm256_setr_epi8(
3946 0, 2, 4, 6, 8, 10, 12, 14,
3947 16, 18, 20, 22, 24, 26, 28, 30,
3948 32, 34, 36, 38, 40, 42, 44, 46,
3949 48, 50, 52, 54, 56, 58, 60, 62,
3950 );
3951 assert_eq_m256i(r, e);
3952 }
3953
3954 #[simd_test(enable = "avx2")]
3955 unsafe fn test_mm256_adds_epi8() {
3956 #[rustfmt::skip]
3957 let a = _mm256_setr_epi8(
3958 0, 1, 2, 3, 4, 5, 6, 7,
3959 8, 9, 10, 11, 12, 13, 14, 15,
3960 16, 17, 18, 19, 20, 21, 22, 23,
3961 24, 25, 26, 27, 28, 29, 30, 31,
3962 );
3963 #[rustfmt::skip]
3964 let b = _mm256_setr_epi8(
3965 32, 33, 34, 35, 36, 37, 38, 39,
3966 40, 41, 42, 43, 44, 45, 46, 47,
3967 48, 49, 50, 51, 52, 53, 54, 55,
3968 56, 57, 58, 59, 60, 61, 62, 63,
3969 );
3970 let r = _mm256_adds_epi8(a, b);
3971 #[rustfmt::skip]
3972 let e = _mm256_setr_epi8(
3973 32, 34, 36, 38, 40, 42, 44, 46,
3974 48, 50, 52, 54, 56, 58, 60, 62,
3975 64, 66, 68, 70, 72, 74, 76, 78,
3976 80, 82, 84, 86, 88, 90, 92, 94,
3977 );
3978 assert_eq_m256i(r, e);
3979 }
3980
3981 #[simd_test(enable = "avx2")]
3982 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3983 let a = _mm256_set1_epi8(0x7F);
3984 let b = _mm256_set1_epi8(1);
3985 let r = _mm256_adds_epi8(a, b);
3986 assert_eq_m256i(r, a);
3987 }
3988
3989 #[simd_test(enable = "avx2")]
3990 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3991 let a = _mm256_set1_epi8(-0x80);
3992 let b = _mm256_set1_epi8(-1);
3993 let r = _mm256_adds_epi8(a, b);
3994 assert_eq_m256i(r, a);
3995 }
3996
3997 #[simd_test(enable = "avx2")]
3998 unsafe fn test_mm256_adds_epi16() {
3999 #[rustfmt::skip]
4000 let a = _mm256_setr_epi16(
4001 0, 1, 2, 3, 4, 5, 6, 7,
4002 8, 9, 10, 11, 12, 13, 14, 15,
4003 );
4004 #[rustfmt::skip]
4005 let b = _mm256_setr_epi16(
4006 32, 33, 34, 35, 36, 37, 38, 39,
4007 40, 41, 42, 43, 44, 45, 46, 47,
4008 );
4009 let r = _mm256_adds_epi16(a, b);
4010 #[rustfmt::skip]
4011 let e = _mm256_setr_epi16(
4012 32, 34, 36, 38, 40, 42, 44, 46,
4013 48, 50, 52, 54, 56, 58, 60, 62,
4014 );
4015
4016 assert_eq_m256i(r, e);
4017 }
4018
4019 #[simd_test(enable = "avx2")]
4020 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4021 let a = _mm256_set1_epi16(0x7FFF);
4022 let b = _mm256_set1_epi16(1);
4023 let r = _mm256_adds_epi16(a, b);
4024 assert_eq_m256i(r, a);
4025 }
4026
4027 #[simd_test(enable = "avx2")]
4028 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4029 let a = _mm256_set1_epi16(-0x8000);
4030 let b = _mm256_set1_epi16(-1);
4031 let r = _mm256_adds_epi16(a, b);
4032 assert_eq_m256i(r, a);
4033 }
4034
4035 #[simd_test(enable = "avx2")]
4036 unsafe fn test_mm256_adds_epu8() {
4037 #[rustfmt::skip]
4038 let a = _mm256_setr_epi8(
4039 0, 1, 2, 3, 4, 5, 6, 7,
4040 8, 9, 10, 11, 12, 13, 14, 15,
4041 16, 17, 18, 19, 20, 21, 22, 23,
4042 24, 25, 26, 27, 28, 29, 30, 31,
4043 );
4044 #[rustfmt::skip]
4045 let b = _mm256_setr_epi8(
4046 32, 33, 34, 35, 36, 37, 38, 39,
4047 40, 41, 42, 43, 44, 45, 46, 47,
4048 48, 49, 50, 51, 52, 53, 54, 55,
4049 56, 57, 58, 59, 60, 61, 62, 63,
4050 );
4051 let r = _mm256_adds_epu8(a, b);
4052 #[rustfmt::skip]
4053 let e = _mm256_setr_epi8(
4054 32, 34, 36, 38, 40, 42, 44, 46,
4055 48, 50, 52, 54, 56, 58, 60, 62,
4056 64, 66, 68, 70, 72, 74, 76, 78,
4057 80, 82, 84, 86, 88, 90, 92, 94,
4058 );
4059 assert_eq_m256i(r, e);
4060 }
4061
4062 #[simd_test(enable = "avx2")]
4063 unsafe fn test_mm256_adds_epu8_saturate() {
4064 let a = _mm256_set1_epi8(!0);
4065 let b = _mm256_set1_epi8(1);
4066 let r = _mm256_adds_epu8(a, b);
4067 assert_eq_m256i(r, a);
4068 }
4069
4070 #[simd_test(enable = "avx2")]
4071 unsafe fn test_mm256_adds_epu16() {
4072 #[rustfmt::skip]
4073 let a = _mm256_setr_epi16(
4074 0, 1, 2, 3, 4, 5, 6, 7,
4075 8, 9, 10, 11, 12, 13, 14, 15,
4076 );
4077 #[rustfmt::skip]
4078 let b = _mm256_setr_epi16(
4079 32, 33, 34, 35, 36, 37, 38, 39,
4080 40, 41, 42, 43, 44, 45, 46, 47,
4081 );
4082 let r = _mm256_adds_epu16(a, b);
4083 #[rustfmt::skip]
4084 let e = _mm256_setr_epi16(
4085 32, 34, 36, 38, 40, 42, 44, 46,
4086 48, 50, 52, 54, 56, 58, 60, 62,
4087 );
4088
4089 assert_eq_m256i(r, e);
4090 }
4091
4092 #[simd_test(enable = "avx2")]
4093 unsafe fn test_mm256_adds_epu16_saturate() {
4094 let a = _mm256_set1_epi16(!0);
4095 let b = _mm256_set1_epi16(1);
4096 let r = _mm256_adds_epu16(a, b);
4097 assert_eq_m256i(r, a);
4098 }
4099
4100 #[simd_test(enable = "avx2")]
4101 unsafe fn test_mm256_and_si256() {
4102 let a = _mm256_set1_epi8(5);
4103 let b = _mm256_set1_epi8(3);
4104 let got = _mm256_and_si256(a, b);
4105 assert_eq_m256i(got, _mm256_set1_epi8(1));
4106 }
4107
4108 #[simd_test(enable = "avx2")]
4109 unsafe fn test_mm256_andnot_si256() {
4110 let a = _mm256_set1_epi8(5);
4111 let b = _mm256_set1_epi8(3);
4112 let got = _mm256_andnot_si256(a, b);
4113 assert_eq_m256i(got, _mm256_set1_epi8(2));
4114 }
4115
4116 #[simd_test(enable = "avx2")]
4117 unsafe fn test_mm256_avg_epu8() {
4118 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4119 let r = _mm256_avg_epu8(a, b);
4120 assert_eq_m256i(r, _mm256_set1_epi8(6));
4121 }
4122
4123 #[simd_test(enable = "avx2")]
4124 unsafe fn test_mm256_avg_epu16() {
4125 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4126 let r = _mm256_avg_epu16(a, b);
4127 assert_eq_m256i(r, _mm256_set1_epi16(6));
4128 }
4129
4130 #[simd_test(enable = "avx2")]
4131 unsafe fn test_mm_blend_epi32() {
4132 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4133 let e = _mm_setr_epi32(9, 3, 3, 3);
4134 let r = _mm_blend_epi32::<0x01>(a, b);
4135 assert_eq_m128i(r, e);
4136
4137 let r = _mm_blend_epi32::<0x0E>(b, a);
4138 assert_eq_m128i(r, e);
4139 }
4140
4141 #[simd_test(enable = "avx2")]
4142 unsafe fn test_mm256_blend_epi32() {
4143 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4144 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4145 let r = _mm256_blend_epi32::<0x01>(a, b);
4146 assert_eq_m256i(r, e);
4147
4148 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4149 let r = _mm256_blend_epi32::<0x82>(a, b);
4150 assert_eq_m256i(r, e);
4151
4152 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4153 let r = _mm256_blend_epi32::<0x7C>(a, b);
4154 assert_eq_m256i(r, e);
4155 }
4156
4157 #[simd_test(enable = "avx2")]
4158 unsafe fn test_mm256_blend_epi16() {
4159 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4160 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4161 let r = _mm256_blend_epi16::<0x01>(a, b);
4162 assert_eq_m256i(r, e);
4163
4164 let r = _mm256_blend_epi16::<0xFE>(b, a);
4165 assert_eq_m256i(r, e);
4166 }
4167
4168 #[simd_test(enable = "avx2")]
4169 unsafe fn test_mm256_blendv_epi8() {
4170 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4171 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4172 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4173 let r = _mm256_blendv_epi8(a, b, mask);
4174 assert_eq_m256i(r, e);
4175 }
4176
4177 #[simd_test(enable = "avx2")]
4178 unsafe fn test_mm_broadcastb_epi8() {
4179 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4180 let res = _mm_broadcastb_epi8(a);
4181 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4182 }
4183
4184 #[simd_test(enable = "avx2")]
4185 unsafe fn test_mm256_broadcastb_epi8() {
4186 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4187 let res = _mm256_broadcastb_epi8(a);
4188 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4189 }
4190
4191 #[simd_test(enable = "avx2")]
4192 unsafe fn test_mm_broadcastd_epi32() {
4193 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4194 let res = _mm_broadcastd_epi32(a);
4195 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4196 }
4197
4198 #[simd_test(enable = "avx2")]
4199 unsafe fn test_mm256_broadcastd_epi32() {
4200 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4201 let res = _mm256_broadcastd_epi32(a);
4202 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4203 }
4204
4205 #[simd_test(enable = "avx2")]
4206 unsafe fn test_mm_broadcastq_epi64() {
4207 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4208 let res = _mm_broadcastq_epi64(a);
4209 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4210 }
4211
4212 #[simd_test(enable = "avx2")]
4213 unsafe fn test_mm256_broadcastq_epi64() {
4214 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4215 let res = _mm256_broadcastq_epi64(a);
4216 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4217 }
4218
4219 #[simd_test(enable = "avx2")]
4220 unsafe fn test_mm_broadcastsd_pd() {
4221 let a = _mm_setr_pd(6.88, 3.44);
4222 let res = _mm_broadcastsd_pd(a);
4223 assert_eq_m128d(res, _mm_set1_pd(6.88));
4224 }
4225
4226 #[simd_test(enable = "avx2")]
4227 unsafe fn test_mm256_broadcastsd_pd() {
4228 let a = _mm_setr_pd(6.88, 3.44);
4229 let res = _mm256_broadcastsd_pd(a);
4230 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4231 }
4232
4233 #[simd_test(enable = "avx2")]
4234 unsafe fn test_mm_broadcastsi128_si256() {
4235 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4236 let res = _mm_broadcastsi128_si256(a);
4237 let retval = _mm256_setr_epi64x(
4238 0x0987654321012334,
4239 0x5678909876543210,
4240 0x0987654321012334,
4241 0x5678909876543210,
4242 );
4243 assert_eq_m256i(res, retval);
4244 }
4245
4246 #[simd_test(enable = "avx2")]
4247 unsafe fn test_mm256_broadcastsi128_si256() {
4248 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4249 let res = _mm256_broadcastsi128_si256(a);
4250 let retval = _mm256_setr_epi64x(
4251 0x0987654321012334,
4252 0x5678909876543210,
4253 0x0987654321012334,
4254 0x5678909876543210,
4255 );
4256 assert_eq_m256i(res, retval);
4257 }
4258
4259 #[simd_test(enable = "avx2")]
4260 unsafe fn test_mm_broadcastss_ps() {
4261 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4262 let res = _mm_broadcastss_ps(a);
4263 assert_eq_m128(res, _mm_set1_ps(6.88));
4264 }
4265
4266 #[simd_test(enable = "avx2")]
4267 unsafe fn test_mm256_broadcastss_ps() {
4268 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4269 let res = _mm256_broadcastss_ps(a);
4270 assert_eq_m256(res, _mm256_set1_ps(6.88));
4271 }
4272
4273 #[simd_test(enable = "avx2")]
4274 unsafe fn test_mm_broadcastw_epi16() {
4275 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4276 let res = _mm_broadcastw_epi16(a);
4277 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4278 }
4279
4280 #[simd_test(enable = "avx2")]
4281 unsafe fn test_mm256_broadcastw_epi16() {
4282 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4283 let res = _mm256_broadcastw_epi16(a);
4284 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4285 }
4286
4287 #[simd_test(enable = "avx2")]
4288 unsafe fn test_mm256_cmpeq_epi8() {
4289 #[rustfmt::skip]
4290 let a = _mm256_setr_epi8(
4291 0, 1, 2, 3, 4, 5, 6, 7,
4292 8, 9, 10, 11, 12, 13, 14, 15,
4293 16, 17, 18, 19, 20, 21, 22, 23,
4294 24, 25, 26, 27, 28, 29, 30, 31,
4295 );
4296 #[rustfmt::skip]
4297 let b = _mm256_setr_epi8(
4298 31, 30, 2, 28, 27, 26, 25, 24,
4299 23, 22, 21, 20, 19, 18, 17, 16,
4300 15, 14, 13, 12, 11, 10, 9, 8,
4301 7, 6, 5, 4, 3, 2, 1, 0,
4302 );
4303 let r = _mm256_cmpeq_epi8(a, b);
4304 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4305 }
4306
4307 #[simd_test(enable = "avx2")]
4308 unsafe fn test_mm256_cmpeq_epi16() {
4309 #[rustfmt::skip]
4310 let a = _mm256_setr_epi16(
4311 0, 1, 2, 3, 4, 5, 6, 7,
4312 8, 9, 10, 11, 12, 13, 14, 15,
4313 );
4314 #[rustfmt::skip]
4315 let b = _mm256_setr_epi16(
4316 15, 14, 2, 12, 11, 10, 9, 8,
4317 7, 6, 5, 4, 3, 2, 1, 0,
4318 );
4319 let r = _mm256_cmpeq_epi16(a, b);
4320 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4321 }
4322
4323 #[simd_test(enable = "avx2")]
4324 unsafe fn test_mm256_cmpeq_epi32() {
4325 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4326 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4327 let r = _mm256_cmpeq_epi32(a, b);
4328 let e = _mm256_set1_epi32(0);
4329 let e = _mm256_insert_epi32::<2>(e, !0);
4330 assert_eq_m256i(r, e);
4331 }
4332
4333 #[simd_test(enable = "avx2")]
4334 unsafe fn test_mm256_cmpeq_epi64() {
4335 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4336 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4337 let r = _mm256_cmpeq_epi64(a, b);
4338 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4339 }
4340
4341 #[simd_test(enable = "avx2")]
4342 unsafe fn test_mm256_cmpgt_epi8() {
4343 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4344 let b = _mm256_set1_epi8(0);
4345 let r = _mm256_cmpgt_epi8(a, b);
4346 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4347 }
4348
4349 #[simd_test(enable = "avx2")]
4350 unsafe fn test_mm256_cmpgt_epi16() {
4351 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4352 let b = _mm256_set1_epi16(0);
4353 let r = _mm256_cmpgt_epi16(a, b);
4354 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4355 }
4356
4357 #[simd_test(enable = "avx2")]
4358 unsafe fn test_mm256_cmpgt_epi32() {
4359 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4360 let b = _mm256_set1_epi32(0);
4361 let r = _mm256_cmpgt_epi32(a, b);
4362 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4363 }
4364
4365 #[simd_test(enable = "avx2")]
4366 unsafe fn test_mm256_cmpgt_epi64() {
4367 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4368 let b = _mm256_set1_epi64x(0);
4369 let r = _mm256_cmpgt_epi64(a, b);
4370 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4371 }
4372
4373 #[simd_test(enable = "avx2")]
4374 unsafe fn test_mm256_cvtepi8_epi16() {
4375 #[rustfmt::skip]
4376 let a = _mm_setr_epi8(
4377 0, 0, -1, 1, -2, 2, -3, 3,
4378 -4, 4, -5, 5, -6, 6, -7, 7,
4379 );
4380 #[rustfmt::skip]
4381 let r = _mm256_setr_epi16(
4382 0, 0, -1, 1, -2, 2, -3, 3,
4383 -4, 4, -5, 5, -6, 6, -7, 7,
4384 );
4385 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4386 }
4387
4388 #[simd_test(enable = "avx2")]
4389 unsafe fn test_mm256_cvtepi8_epi32() {
4390 #[rustfmt::skip]
4391 let a = _mm_setr_epi8(
4392 0, 0, -1, 1, -2, 2, -3, 3,
4393 -4, 4, -5, 5, -6, 6, -7, 7,
4394 );
4395 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4396 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4397 }
4398
4399 #[simd_test(enable = "avx2")]
4400 unsafe fn test_mm256_cvtepi8_epi64() {
4401 #[rustfmt::skip]
4402 let a = _mm_setr_epi8(
4403 0, 0, -1, 1, -2, 2, -3, 3,
4404 -4, 4, -5, 5, -6, 6, -7, 7,
4405 );
4406 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4407 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4408 }
4409
4410 #[simd_test(enable = "avx2")]
4411 unsafe fn test_mm256_cvtepi16_epi32() {
4412 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4413 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4414 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4415 }
4416
4417 #[simd_test(enable = "avx2")]
4418 unsafe fn test_mm256_cvtepi16_epi64() {
4419 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4420 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4421 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4422 }
4423
4424 #[simd_test(enable = "avx2")]
4425 unsafe fn test_mm256_cvtepi32_epi64() {
4426 let a = _mm_setr_epi32(0, 0, -1, 1);
4427 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4428 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4429 }
4430
4431 #[simd_test(enable = "avx2")]
4432 unsafe fn test_mm256_cvtepu16_epi32() {
4433 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4434 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4435 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4436 }
4437
4438 #[simd_test(enable = "avx2")]
4439 unsafe fn test_mm256_cvtepu16_epi64() {
4440 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4441 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4442 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 unsafe fn test_mm256_cvtepu32_epi64() {
4447 let a = _mm_setr_epi32(0, 1, 2, 3);
4448 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4449 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4450 }
4451
4452 #[simd_test(enable = "avx2")]
4453 unsafe fn test_mm256_cvtepu8_epi16() {
4454 #[rustfmt::skip]
4455 let a = _mm_setr_epi8(
4456 0, 1, 2, 3, 4, 5, 6, 7,
4457 8, 9, 10, 11, 12, 13, 14, 15,
4458 );
4459 #[rustfmt::skip]
4460 let r = _mm256_setr_epi16(
4461 0, 1, 2, 3, 4, 5, 6, 7,
4462 8, 9, 10, 11, 12, 13, 14, 15,
4463 );
4464 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4465 }
4466
4467 #[simd_test(enable = "avx2")]
4468 unsafe fn test_mm256_cvtepu8_epi32() {
4469 #[rustfmt::skip]
4470 let a = _mm_setr_epi8(
4471 0, 1, 2, 3, 4, 5, 6, 7,
4472 8, 9, 10, 11, 12, 13, 14, 15,
4473 );
4474 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4475 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4476 }
4477
4478 #[simd_test(enable = "avx2")]
4479 unsafe fn test_mm256_cvtepu8_epi64() {
4480 #[rustfmt::skip]
4481 let a = _mm_setr_epi8(
4482 0, 1, 2, 3, 4, 5, 6, 7,
4483 8, 9, 10, 11, 12, 13, 14, 15,
4484 );
4485 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4486 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4487 }
4488
4489 #[simd_test(enable = "avx2")]
4490 unsafe fn test_mm256_extracti128_si256() {
4491 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4492 let r = _mm256_extracti128_si256::<1>(a);
4493 let e = _mm_setr_epi64x(3, 4);
4494 assert_eq_m128i(r, e);
4495 }
4496
4497 #[simd_test(enable = "avx2")]
4498 unsafe fn test_mm256_hadd_epi16() {
4499 let a = _mm256_set1_epi16(2);
4500 let b = _mm256_set1_epi16(4);
4501 let r = _mm256_hadd_epi16(a, b);
4502 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4503 assert_eq_m256i(r, e);
4504 }
4505
4506 #[simd_test(enable = "avx2")]
4507 unsafe fn test_mm256_hadd_epi32() {
4508 let a = _mm256_set1_epi32(2);
4509 let b = _mm256_set1_epi32(4);
4510 let r = _mm256_hadd_epi32(a, b);
4511 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4512 assert_eq_m256i(r, e);
4513 }
4514
4515 #[simd_test(enable = "avx2")]
4516 unsafe fn test_mm256_hadds_epi16() {
4517 let a = _mm256_set1_epi16(2);
4518 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4519 let a = _mm256_insert_epi16::<1>(a, 1);
4520 let b = _mm256_set1_epi16(4);
4521 let r = _mm256_hadds_epi16(a, b);
4522 #[rustfmt::skip]
4523 let e = _mm256_setr_epi16(
4524 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4525 4, 4, 4, 4, 8, 8, 8, 8,
4526 );
4527 assert_eq_m256i(r, e);
4528 }
4529
4530 #[simd_test(enable = "avx2")]
4531 unsafe fn test_mm256_hsub_epi16() {
4532 let a = _mm256_set1_epi16(2);
4533 let b = _mm256_set1_epi16(4);
4534 let r = _mm256_hsub_epi16(a, b);
4535 let e = _mm256_set1_epi16(0);
4536 assert_eq_m256i(r, e);
4537 }
4538
4539 #[simd_test(enable = "avx2")]
4540 unsafe fn test_mm256_hsub_epi32() {
4541 let a = _mm256_set1_epi32(2);
4542 let b = _mm256_set1_epi32(4);
4543 let r = _mm256_hsub_epi32(a, b);
4544 let e = _mm256_set1_epi32(0);
4545 assert_eq_m256i(r, e);
4546 }
4547
4548 #[simd_test(enable = "avx2")]
4549 unsafe fn test_mm256_hsubs_epi16() {
4550 let a = _mm256_set1_epi16(2);
4551 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4552 let a = _mm256_insert_epi16::<1>(a, -1);
4553 let b = _mm256_set1_epi16(4);
4554 let r = _mm256_hsubs_epi16(a, b);
4555 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4556 assert_eq_m256i(r, e);
4557 }
4558
4559 #[simd_test(enable = "avx2")]
4560 unsafe fn test_mm256_madd_epi16() {
4561 let a = _mm256_set1_epi16(2);
4562 let b = _mm256_set1_epi16(4);
4563 let r = _mm256_madd_epi16(a, b);
4564 let e = _mm256_set1_epi32(16);
4565 assert_eq_m256i(r, e);
4566 }
4567
4568 #[simd_test(enable = "avx2")]
4569 unsafe fn test_mm256_inserti128_si256() {
4570 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4571 let b = _mm_setr_epi64x(7, 8);
4572 let r = _mm256_inserti128_si256::<1>(a, b);
4573 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4574 assert_eq_m256i(r, e);
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 unsafe fn test_mm256_maddubs_epi16() {
4579 let a = _mm256_set1_epi8(2);
4580 let b = _mm256_set1_epi8(4);
4581 let r = _mm256_maddubs_epi16(a, b);
4582 let e = _mm256_set1_epi16(16);
4583 assert_eq_m256i(r, e);
4584 }
4585
4586 #[simd_test(enable = "avx2")]
4587 unsafe fn test_mm_maskload_epi32() {
4588 let nums = [1, 2, 3, 4];
4589 let a = &nums as *const i32;
4590 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4591 let r = _mm_maskload_epi32(a, mask);
4592 let e = _mm_setr_epi32(1, 0, 0, 4);
4593 assert_eq_m128i(r, e);
4594 }
4595
4596 #[simd_test(enable = "avx2")]
4597 unsafe fn test_mm256_maskload_epi32() {
4598 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4599 let a = &nums as *const i32;
4600 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4601 let r = _mm256_maskload_epi32(a, mask);
4602 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4603 assert_eq_m256i(r, e);
4604 }
4605
4606 #[simd_test(enable = "avx2")]
4607 unsafe fn test_mm_maskload_epi64() {
4608 let nums = [1_i64, 2_i64];
4609 let a = &nums as *const i64;
4610 let mask = _mm_setr_epi64x(0, -1);
4611 let r = _mm_maskload_epi64(a, mask);
4612 let e = _mm_setr_epi64x(0, 2);
4613 assert_eq_m128i(r, e);
4614 }
4615
4616 #[simd_test(enable = "avx2")]
4617 unsafe fn test_mm256_maskload_epi64() {
4618 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4619 let a = &nums as *const i64;
4620 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4621 let r = _mm256_maskload_epi64(a, mask);
4622 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4623 assert_eq_m256i(r, e);
4624 }
4625
4626 #[simd_test(enable = "avx2")]
4627 unsafe fn test_mm_maskstore_epi32() {
4628 let a = _mm_setr_epi32(1, 2, 3, 4);
4629 let mut arr = [-1, -1, -1, -1];
4630 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4631 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4632 let e = [1, -1, -1, 4];
4633 assert_eq!(arr, e);
4634 }
4635
4636 #[simd_test(enable = "avx2")]
4637 unsafe fn test_mm256_maskstore_epi32() {
4638 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4639 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4640 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4641 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4642 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4643 assert_eq!(arr, e);
4644 }
4645
4646 #[simd_test(enable = "avx2")]
4647 unsafe fn test_mm_maskstore_epi64() {
4648 let a = _mm_setr_epi64x(1_i64, 2_i64);
4649 let mut arr = [-1_i64, -1_i64];
4650 let mask = _mm_setr_epi64x(0, -1);
4651 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4652 let e = [-1, 2];
4653 assert_eq!(arr, e);
4654 }
4655
4656 #[simd_test(enable = "avx2")]
4657 unsafe fn test_mm256_maskstore_epi64() {
4658 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4659 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4660 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4661 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4662 let e = [-1, 2, 3, -1];
4663 assert_eq!(arr, e);
4664 }
4665
4666 #[simd_test(enable = "avx2")]
4667 unsafe fn test_mm256_max_epi16() {
4668 let a = _mm256_set1_epi16(2);
4669 let b = _mm256_set1_epi16(4);
4670 let r = _mm256_max_epi16(a, b);
4671 assert_eq_m256i(r, b);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 unsafe fn test_mm256_max_epi32() {
4676 let a = _mm256_set1_epi32(2);
4677 let b = _mm256_set1_epi32(4);
4678 let r = _mm256_max_epi32(a, b);
4679 assert_eq_m256i(r, b);
4680 }
4681
4682 #[simd_test(enable = "avx2")]
4683 unsafe fn test_mm256_max_epi8() {
4684 let a = _mm256_set1_epi8(2);
4685 let b = _mm256_set1_epi8(4);
4686 let r = _mm256_max_epi8(a, b);
4687 assert_eq_m256i(r, b);
4688 }
4689
4690 #[simd_test(enable = "avx2")]
4691 unsafe fn test_mm256_max_epu16() {
4692 let a = _mm256_set1_epi16(2);
4693 let b = _mm256_set1_epi16(4);
4694 let r = _mm256_max_epu16(a, b);
4695 assert_eq_m256i(r, b);
4696 }
4697
4698 #[simd_test(enable = "avx2")]
4699 unsafe fn test_mm256_max_epu32() {
4700 let a = _mm256_set1_epi32(2);
4701 let b = _mm256_set1_epi32(4);
4702 let r = _mm256_max_epu32(a, b);
4703 assert_eq_m256i(r, b);
4704 }
4705
4706 #[simd_test(enable = "avx2")]
4707 unsafe fn test_mm256_max_epu8() {
4708 let a = _mm256_set1_epi8(2);
4709 let b = _mm256_set1_epi8(4);
4710 let r = _mm256_max_epu8(a, b);
4711 assert_eq_m256i(r, b);
4712 }
4713
4714 #[simd_test(enable = "avx2")]
4715 unsafe fn test_mm256_min_epi16() {
4716 let a = _mm256_set1_epi16(2);
4717 let b = _mm256_set1_epi16(4);
4718 let r = _mm256_min_epi16(a, b);
4719 assert_eq_m256i(r, a);
4720 }
4721
4722 #[simd_test(enable = "avx2")]
4723 unsafe fn test_mm256_min_epi32() {
4724 let a = _mm256_set1_epi32(2);
4725 let b = _mm256_set1_epi32(4);
4726 let r = _mm256_min_epi32(a, b);
4727 assert_eq_m256i(r, a);
4728 }
4729
4730 #[simd_test(enable = "avx2")]
4731 unsafe fn test_mm256_min_epi8() {
4732 let a = _mm256_set1_epi8(2);
4733 let b = _mm256_set1_epi8(4);
4734 let r = _mm256_min_epi8(a, b);
4735 assert_eq_m256i(r, a);
4736 }
4737
4738 #[simd_test(enable = "avx2")]
4739 unsafe fn test_mm256_min_epu16() {
4740 let a = _mm256_set1_epi16(2);
4741 let b = _mm256_set1_epi16(4);
4742 let r = _mm256_min_epu16(a, b);
4743 assert_eq_m256i(r, a);
4744 }
4745
4746 #[simd_test(enable = "avx2")]
4747 unsafe fn test_mm256_min_epu32() {
4748 let a = _mm256_set1_epi32(2);
4749 let b = _mm256_set1_epi32(4);
4750 let r = _mm256_min_epu32(a, b);
4751 assert_eq_m256i(r, a);
4752 }
4753
4754 #[simd_test(enable = "avx2")]
4755 unsafe fn test_mm256_min_epu8() {
4756 let a = _mm256_set1_epi8(2);
4757 let b = _mm256_set1_epi8(4);
4758 let r = _mm256_min_epu8(a, b);
4759 assert_eq_m256i(r, a);
4760 }
4761
4762 #[simd_test(enable = "avx2")]
4763 unsafe fn test_mm256_movemask_epi8() {
4764 let a = _mm256_set1_epi8(-1);
4765 let r = _mm256_movemask_epi8(a);
4766 let e = -1;
4767 assert_eq!(r, e);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 unsafe fn test_mm256_mpsadbw_epu8() {
4772 let a = _mm256_set1_epi8(2);
4773 let b = _mm256_set1_epi8(4);
4774 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4775 let e = _mm256_set1_epi16(8);
4776 assert_eq_m256i(r, e);
4777 }
4778
4779 #[simd_test(enable = "avx2")]
4780 unsafe fn test_mm256_mul_epi32() {
4781 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4782 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4783 let r = _mm256_mul_epi32(a, b);
4784 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4785 assert_eq_m256i(r, e);
4786 }
4787
4788 #[simd_test(enable = "avx2")]
4789 unsafe fn test_mm256_mul_epu32() {
4790 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4791 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4792 let r = _mm256_mul_epu32(a, b);
4793 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4794 assert_eq_m256i(r, e);
4795 }
4796
4797 #[simd_test(enable = "avx2")]
4798 unsafe fn test_mm256_mulhi_epi16() {
4799 let a = _mm256_set1_epi16(6535);
4800 let b = _mm256_set1_epi16(6535);
4801 let r = _mm256_mulhi_epi16(a, b);
4802 let e = _mm256_set1_epi16(651);
4803 assert_eq_m256i(r, e);
4804 }
4805
4806 #[simd_test(enable = "avx2")]
4807 unsafe fn test_mm256_mulhi_epu16() {
4808 let a = _mm256_set1_epi16(6535);
4809 let b = _mm256_set1_epi16(6535);
4810 let r = _mm256_mulhi_epu16(a, b);
4811 let e = _mm256_set1_epi16(651);
4812 assert_eq_m256i(r, e);
4813 }
4814
4815 #[simd_test(enable = "avx2")]
4816 unsafe fn test_mm256_mullo_epi16() {
4817 let a = _mm256_set1_epi16(2);
4818 let b = _mm256_set1_epi16(4);
4819 let r = _mm256_mullo_epi16(a, b);
4820 let e = _mm256_set1_epi16(8);
4821 assert_eq_m256i(r, e);
4822 }
4823
4824 #[simd_test(enable = "avx2")]
4825 unsafe fn test_mm256_mullo_epi32() {
4826 let a = _mm256_set1_epi32(2);
4827 let b = _mm256_set1_epi32(4);
4828 let r = _mm256_mullo_epi32(a, b);
4829 let e = _mm256_set1_epi32(8);
4830 assert_eq_m256i(r, e);
4831 }
4832
4833 #[simd_test(enable = "avx2")]
4834 unsafe fn test_mm256_mulhrs_epi16() {
4835 let a = _mm256_set1_epi16(2);
4836 let b = _mm256_set1_epi16(4);
4837 let r = _mm256_mullo_epi16(a, b);
4838 let e = _mm256_set1_epi16(8);
4839 assert_eq_m256i(r, e);
4840 }
4841
4842 #[simd_test(enable = "avx2")]
4843 unsafe fn test_mm256_or_si256() {
4844 let a = _mm256_set1_epi8(-1);
4845 let b = _mm256_set1_epi8(0);
4846 let r = _mm256_or_si256(a, b);
4847 assert_eq_m256i(r, a);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 unsafe fn test_mm256_packs_epi16() {
4852 let a = _mm256_set1_epi16(2);
4853 let b = _mm256_set1_epi16(4);
4854 let r = _mm256_packs_epi16(a, b);
4855 #[rustfmt::skip]
4856 let e = _mm256_setr_epi8(
4857 2, 2, 2, 2, 2, 2, 2, 2,
4858 4, 4, 4, 4, 4, 4, 4, 4,
4859 2, 2, 2, 2, 2, 2, 2, 2,
4860 4, 4, 4, 4, 4, 4, 4, 4,
4861 );
4862
4863 assert_eq_m256i(r, e);
4864 }
4865
4866 #[simd_test(enable = "avx2")]
4867 unsafe fn test_mm256_packs_epi32() {
4868 let a = _mm256_set1_epi32(2);
4869 let b = _mm256_set1_epi32(4);
4870 let r = _mm256_packs_epi32(a, b);
4871 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4872
4873 assert_eq_m256i(r, e);
4874 }
4875
4876 #[simd_test(enable = "avx2")]
4877 unsafe fn test_mm256_packus_epi16() {
4878 let a = _mm256_set1_epi16(2);
4879 let b = _mm256_set1_epi16(4);
4880 let r = _mm256_packus_epi16(a, b);
4881 #[rustfmt::skip]
4882 let e = _mm256_setr_epi8(
4883 2, 2, 2, 2, 2, 2, 2, 2,
4884 4, 4, 4, 4, 4, 4, 4, 4,
4885 2, 2, 2, 2, 2, 2, 2, 2,
4886 4, 4, 4, 4, 4, 4, 4, 4,
4887 );
4888
4889 assert_eq_m256i(r, e);
4890 }
4891
4892 #[simd_test(enable = "avx2")]
4893 unsafe fn test_mm256_packus_epi32() {
4894 let a = _mm256_set1_epi32(2);
4895 let b = _mm256_set1_epi32(4);
4896 let r = _mm256_packus_epi32(a, b);
4897 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4898
4899 assert_eq_m256i(r, e);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 unsafe fn test_mm256_sad_epu8() {
4904 let a = _mm256_set1_epi8(2);
4905 let b = _mm256_set1_epi8(4);
4906 let r = _mm256_sad_epu8(a, b);
4907 let e = _mm256_set1_epi64x(16);
4908 assert_eq_m256i(r, e);
4909 }
4910
4911 #[simd_test(enable = "avx2")]
4912 unsafe fn test_mm256_shufflehi_epi16() {
4913 #[rustfmt::skip]
4914 let a = _mm256_setr_epi16(
4915 0, 1, 2, 3, 11, 22, 33, 44,
4916 4, 5, 6, 7, 55, 66, 77, 88,
4917 );
4918 #[rustfmt::skip]
4919 let e = _mm256_setr_epi16(
4920 0, 1, 2, 3, 44, 22, 22, 11,
4921 4, 5, 6, 7, 88, 66, 66, 55,
4922 );
4923 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4924 assert_eq_m256i(r, e);
4925 }
4926
4927 #[simd_test(enable = "avx2")]
4928 unsafe fn test_mm256_shufflelo_epi16() {
4929 #[rustfmt::skip]
4930 let a = _mm256_setr_epi16(
4931 11, 22, 33, 44, 0, 1, 2, 3,
4932 55, 66, 77, 88, 4, 5, 6, 7,
4933 );
4934 #[rustfmt::skip]
4935 let e = _mm256_setr_epi16(
4936 44, 22, 22, 11, 0, 1, 2, 3,
4937 88, 66, 66, 55, 4, 5, 6, 7,
4938 );
4939 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4940 assert_eq_m256i(r, e);
4941 }
4942
4943 #[simd_test(enable = "avx2")]
4944 unsafe fn test_mm256_sign_epi16() {
4945 let a = _mm256_set1_epi16(2);
4946 let b = _mm256_set1_epi16(-1);
4947 let r = _mm256_sign_epi16(a, b);
4948 let e = _mm256_set1_epi16(-2);
4949 assert_eq_m256i(r, e);
4950 }
4951
4952 #[simd_test(enable = "avx2")]
4953 unsafe fn test_mm256_sign_epi32() {
4954 let a = _mm256_set1_epi32(2);
4955 let b = _mm256_set1_epi32(-1);
4956 let r = _mm256_sign_epi32(a, b);
4957 let e = _mm256_set1_epi32(-2);
4958 assert_eq_m256i(r, e);
4959 }
4960
4961 #[simd_test(enable = "avx2")]
4962 unsafe fn test_mm256_sign_epi8() {
4963 let a = _mm256_set1_epi8(2);
4964 let b = _mm256_set1_epi8(-1);
4965 let r = _mm256_sign_epi8(a, b);
4966 let e = _mm256_set1_epi8(-2);
4967 assert_eq_m256i(r, e);
4968 }
4969
4970 #[simd_test(enable = "avx2")]
4971 unsafe fn test_mm256_sll_epi16() {
4972 let a = _mm256_set1_epi16(0xFF);
4973 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4974 let r = _mm256_sll_epi16(a, b);
4975 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4976 }
4977
4978 #[simd_test(enable = "avx2")]
4979 unsafe fn test_mm256_sll_epi32() {
4980 let a = _mm256_set1_epi32(0xFFFF);
4981 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4982 let r = _mm256_sll_epi32(a, b);
4983 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4984 }
4985
4986 #[simd_test(enable = "avx2")]
4987 unsafe fn test_mm256_sll_epi64() {
4988 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4989 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4990 let r = _mm256_sll_epi64(a, b);
4991 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4992 }
4993
4994 #[simd_test(enable = "avx2")]
4995 unsafe fn test_mm256_slli_epi16() {
4996 assert_eq_m256i(
4997 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4998 _mm256_set1_epi16(0xFF0),
4999 );
5000 }
5001
5002 #[simd_test(enable = "avx2")]
5003 unsafe fn test_mm256_slli_epi32() {
5004 assert_eq_m256i(
5005 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5006 _mm256_set1_epi32(0xFFFF0),
5007 );
5008 }
5009
5010 #[simd_test(enable = "avx2")]
5011 unsafe fn test_mm256_slli_epi64() {
5012 assert_eq_m256i(
5013 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5014 _mm256_set1_epi64x(0xFFFFFFFF0),
5015 );
5016 }
5017
5018 #[simd_test(enable = "avx2")]
5019 unsafe fn test_mm256_slli_si256() {
5020 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5021 let r = _mm256_slli_si256::<3>(a);
5022 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5023 }
5024
5025 #[simd_test(enable = "avx2")]
5026 unsafe fn test_mm_sllv_epi32() {
5027 let a = _mm_set1_epi32(2);
5028 let b = _mm_set1_epi32(1);
5029 let r = _mm_sllv_epi32(a, b);
5030 let e = _mm_set1_epi32(4);
5031 assert_eq_m128i(r, e);
5032 }
5033
5034 #[simd_test(enable = "avx2")]
5035 unsafe fn test_mm256_sllv_epi32() {
5036 let a = _mm256_set1_epi32(2);
5037 let b = _mm256_set1_epi32(1);
5038 let r = _mm256_sllv_epi32(a, b);
5039 let e = _mm256_set1_epi32(4);
5040 assert_eq_m256i(r, e);
5041 }
5042
5043 #[simd_test(enable = "avx2")]
5044 unsafe fn test_mm_sllv_epi64() {
5045 let a = _mm_set1_epi64x(2);
5046 let b = _mm_set1_epi64x(1);
5047 let r = _mm_sllv_epi64(a, b);
5048 let e = _mm_set1_epi64x(4);
5049 assert_eq_m128i(r, e);
5050 }
5051
5052 #[simd_test(enable = "avx2")]
5053 unsafe fn test_mm256_sllv_epi64() {
5054 let a = _mm256_set1_epi64x(2);
5055 let b = _mm256_set1_epi64x(1);
5056 let r = _mm256_sllv_epi64(a, b);
5057 let e = _mm256_set1_epi64x(4);
5058 assert_eq_m256i(r, e);
5059 }
5060
5061 #[simd_test(enable = "avx2")]
5062 unsafe fn test_mm256_sra_epi16() {
5063 let a = _mm256_set1_epi16(-1);
5064 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5065 let r = _mm256_sra_epi16(a, b);
5066 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5067 }
5068
5069 #[simd_test(enable = "avx2")]
5070 unsafe fn test_mm256_sra_epi32() {
5071 let a = _mm256_set1_epi32(-1);
5072 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5073 let r = _mm256_sra_epi32(a, b);
5074 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5075 }
5076
5077 #[simd_test(enable = "avx2")]
5078 unsafe fn test_mm256_srai_epi16() {
5079 assert_eq_m256i(
5080 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5081 _mm256_set1_epi16(-1),
5082 );
5083 }
5084
5085 #[simd_test(enable = "avx2")]
5086 unsafe fn test_mm256_srai_epi32() {
5087 assert_eq_m256i(
5088 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5089 _mm256_set1_epi32(-1),
5090 );
5091 }
5092
5093 #[simd_test(enable = "avx2")]
5094 unsafe fn test_mm_srav_epi32() {
5095 let a = _mm_set1_epi32(4);
5096 let count = _mm_set1_epi32(1);
5097 let r = _mm_srav_epi32(a, count);
5098 let e = _mm_set1_epi32(2);
5099 assert_eq_m128i(r, e);
5100 }
5101
5102 #[simd_test(enable = "avx2")]
5103 unsafe fn test_mm256_srav_epi32() {
5104 let a = _mm256_set1_epi32(4);
5105 let count = _mm256_set1_epi32(1);
5106 let r = _mm256_srav_epi32(a, count);
5107 let e = _mm256_set1_epi32(2);
5108 assert_eq_m256i(r, e);
5109 }
5110
5111 #[simd_test(enable = "avx2")]
5112 unsafe fn test_mm256_srli_si256() {
5113 #[rustfmt::skip]
5114 let a = _mm256_setr_epi8(
5115 1, 2, 3, 4, 5, 6, 7, 8,
5116 9, 10, 11, 12, 13, 14, 15, 16,
5117 17, 18, 19, 20, 21, 22, 23, 24,
5118 25, 26, 27, 28, 29, 30, 31, 32,
5119 );
5120 let r = _mm256_srli_si256::<3>(a);
5121 #[rustfmt::skip]
5122 let e = _mm256_setr_epi8(
5123 4, 5, 6, 7, 8, 9, 10, 11,
5124 12, 13, 14, 15, 16, 0, 0, 0,
5125 20, 21, 22, 23, 24, 25, 26, 27,
5126 28, 29, 30, 31, 32, 0, 0, 0,
5127 );
5128 assert_eq_m256i(r, e);
5129 }
5130
5131 #[simd_test(enable = "avx2")]
5132 unsafe fn test_mm256_srl_epi16() {
5133 let a = _mm256_set1_epi16(0xFF);
5134 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5135 let r = _mm256_srl_epi16(a, b);
5136 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5137 }
5138
5139 #[simd_test(enable = "avx2")]
5140 unsafe fn test_mm256_srl_epi32() {
5141 let a = _mm256_set1_epi32(0xFFFF);
5142 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5143 let r = _mm256_srl_epi32(a, b);
5144 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5145 }
5146
5147 #[simd_test(enable = "avx2")]
5148 unsafe fn test_mm256_srl_epi64() {
5149 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5150 let b = _mm_setr_epi64x(4, 0);
5151 let r = _mm256_srl_epi64(a, b);
5152 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5153 }
5154
5155 #[simd_test(enable = "avx2")]
5156 unsafe fn test_mm256_srli_epi16() {
5157 assert_eq_m256i(
5158 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5159 _mm256_set1_epi16(0xF),
5160 );
5161 }
5162
5163 #[simd_test(enable = "avx2")]
5164 unsafe fn test_mm256_srli_epi32() {
5165 assert_eq_m256i(
5166 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5167 _mm256_set1_epi32(0xFFF),
5168 );
5169 }
5170
5171 #[simd_test(enable = "avx2")]
5172 unsafe fn test_mm256_srli_epi64() {
5173 assert_eq_m256i(
5174 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5175 _mm256_set1_epi64x(0xFFFFFFF),
5176 );
5177 }
5178
5179 #[simd_test(enable = "avx2")]
5180 unsafe fn test_mm_srlv_epi32() {
5181 let a = _mm_set1_epi32(2);
5182 let count = _mm_set1_epi32(1);
5183 let r = _mm_srlv_epi32(a, count);
5184 let e = _mm_set1_epi32(1);
5185 assert_eq_m128i(r, e);
5186 }
5187
5188 #[simd_test(enable = "avx2")]
5189 unsafe fn test_mm256_srlv_epi32() {
5190 let a = _mm256_set1_epi32(2);
5191 let count = _mm256_set1_epi32(1);
5192 let r = _mm256_srlv_epi32(a, count);
5193 let e = _mm256_set1_epi32(1);
5194 assert_eq_m256i(r, e);
5195 }
5196
5197 #[simd_test(enable = "avx2")]
5198 unsafe fn test_mm_srlv_epi64() {
5199 let a = _mm_set1_epi64x(2);
5200 let count = _mm_set1_epi64x(1);
5201 let r = _mm_srlv_epi64(a, count);
5202 let e = _mm_set1_epi64x(1);
5203 assert_eq_m128i(r, e);
5204 }
5205
5206 #[simd_test(enable = "avx2")]
5207 unsafe fn test_mm256_srlv_epi64() {
5208 let a = _mm256_set1_epi64x(2);
5209 let count = _mm256_set1_epi64x(1);
5210 let r = _mm256_srlv_epi64(a, count);
5211 let e = _mm256_set1_epi64x(1);
5212 assert_eq_m256i(r, e);
5213 }
5214
5215 #[simd_test(enable = "avx2")]
5216 unsafe fn test_mm256_stream_load_si256() {
5217 let a = _mm256_set_epi64x(5, 6, 7, 8);
5218 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5219 assert_eq_m256i(a, r);
5220 }
5221
5222 #[simd_test(enable = "avx2")]
5223 unsafe fn test_mm256_sub_epi16() {
5224 let a = _mm256_set1_epi16(4);
5225 let b = _mm256_set1_epi16(2);
5226 let r = _mm256_sub_epi16(a, b);
5227 assert_eq_m256i(r, b);
5228 }
5229
5230 #[simd_test(enable = "avx2")]
5231 unsafe fn test_mm256_sub_epi32() {
5232 let a = _mm256_set1_epi32(4);
5233 let b = _mm256_set1_epi32(2);
5234 let r = _mm256_sub_epi32(a, b);
5235 assert_eq_m256i(r, b);
5236 }
5237
5238 #[simd_test(enable = "avx2")]
5239 unsafe fn test_mm256_sub_epi64() {
5240 let a = _mm256_set1_epi64x(4);
5241 let b = _mm256_set1_epi64x(2);
5242 let r = _mm256_sub_epi64(a, b);
5243 assert_eq_m256i(r, b);
5244 }
5245
5246 #[simd_test(enable = "avx2")]
5247 unsafe fn test_mm256_sub_epi8() {
5248 let a = _mm256_set1_epi8(4);
5249 let b = _mm256_set1_epi8(2);
5250 let r = _mm256_sub_epi8(a, b);
5251 assert_eq_m256i(r, b);
5252 }
5253
5254 #[simd_test(enable = "avx2")]
5255 unsafe fn test_mm256_subs_epi16() {
5256 let a = _mm256_set1_epi16(4);
5257 let b = _mm256_set1_epi16(2);
5258 let r = _mm256_subs_epi16(a, b);
5259 assert_eq_m256i(r, b);
5260 }
5261
5262 #[simd_test(enable = "avx2")]
5263 unsafe fn test_mm256_subs_epi8() {
5264 let a = _mm256_set1_epi8(4);
5265 let b = _mm256_set1_epi8(2);
5266 let r = _mm256_subs_epi8(a, b);
5267 assert_eq_m256i(r, b);
5268 }
5269
5270 #[simd_test(enable = "avx2")]
5271 unsafe fn test_mm256_subs_epu16() {
5272 let a = _mm256_set1_epi16(4);
5273 let b = _mm256_set1_epi16(2);
5274 let r = _mm256_subs_epu16(a, b);
5275 assert_eq_m256i(r, b);
5276 }
5277
5278 #[simd_test(enable = "avx2")]
5279 unsafe fn test_mm256_subs_epu8() {
5280 let a = _mm256_set1_epi8(4);
5281 let b = _mm256_set1_epi8(2);
5282 let r = _mm256_subs_epu8(a, b);
5283 assert_eq_m256i(r, b);
5284 }
5285
5286 #[simd_test(enable = "avx2")]
5287 unsafe fn test_mm256_xor_si256() {
5288 let a = _mm256_set1_epi8(5);
5289 let b = _mm256_set1_epi8(3);
5290 let r = _mm256_xor_si256(a, b);
5291 assert_eq_m256i(r, _mm256_set1_epi8(6));
5292 }
5293
5294 #[simd_test(enable = "avx2")]
5295 unsafe fn test_mm256_alignr_epi8() {
5296 #[rustfmt::skip]
5297 let a = _mm256_setr_epi8(
5298 1, 2, 3, 4, 5, 6, 7, 8,
5299 9, 10, 11, 12, 13, 14, 15, 16,
5300 17, 18, 19, 20, 21, 22, 23, 24,
5301 25, 26, 27, 28, 29, 30, 31, 32,
5302 );
5303 #[rustfmt::skip]
5304 let b = _mm256_setr_epi8(
5305 -1, -2, -3, -4, -5, -6, -7, -8,
5306 -9, -10, -11, -12, -13, -14, -15, -16,
5307 -17, -18, -19, -20, -21, -22, -23, -24,
5308 -25, -26, -27, -28, -29, -30, -31, -32,
5309 );
5310 let r = _mm256_alignr_epi8::<33>(a, b);
5311 assert_eq_m256i(r, _mm256_set1_epi8(0));
5312
5313 let r = _mm256_alignr_epi8::<17>(a, b);
5314 #[rustfmt::skip]
5315 let expected = _mm256_setr_epi8(
5316 2, 3, 4, 5, 6, 7, 8, 9,
5317 10, 11, 12, 13, 14, 15, 16, 0,
5318 18, 19, 20, 21, 22, 23, 24, 25,
5319 26, 27, 28, 29, 30, 31, 32, 0,
5320 );
5321 assert_eq_m256i(r, expected);
5322
5323 let r = _mm256_alignr_epi8::<4>(a, b);
5324 #[rustfmt::skip]
5325 let expected = _mm256_setr_epi8(
5326 -5, -6, -7, -8, -9, -10, -11, -12,
5327 -13, -14, -15, -16, 1, 2, 3, 4,
5328 -21, -22, -23, -24, -25, -26, -27, -28,
5329 -29, -30, -31, -32, 17, 18, 19, 20,
5330 );
5331 assert_eq_m256i(r, expected);
5332
5333 let r = _mm256_alignr_epi8::<15>(a, b);
5334 #[rustfmt::skip]
5335 let expected = _mm256_setr_epi8(
5336 -16, 1, 2, 3, 4, 5, 6, 7,
5337 8, 9, 10, 11, 12, 13, 14, 15,
5338 -32, 17, 18, 19, 20, 21, 22, 23,
5339 24, 25, 26, 27, 28, 29, 30, 31,
5340 );
5341 assert_eq_m256i(r, expected);
5342
5343 let r = _mm256_alignr_epi8::<0>(a, b);
5344 assert_eq_m256i(r, b);
5345
5346 let r = _mm256_alignr_epi8::<16>(a, b);
5347 assert_eq_m256i(r, a);
5348 }
5349
5350 #[simd_test(enable = "avx2")]
5351 unsafe fn test_mm256_shuffle_epi8() {
5352 #[rustfmt::skip]
5353 let a = _mm256_setr_epi8(
5354 1, 2, 3, 4, 5, 6, 7, 8,
5355 9, 10, 11, 12, 13, 14, 15, 16,
5356 17, 18, 19, 20, 21, 22, 23, 24,
5357 25, 26, 27, 28, 29, 30, 31, 32,
5358 );
5359 #[rustfmt::skip]
5360 let b = _mm256_setr_epi8(
5361 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5362 12, 5, 5, 10, 4, 1, 8, 0,
5363 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5364 12, 5, 5, 10, 4, 1, 8, 0,
5365 );
5366 #[rustfmt::skip]
5367 let expected = _mm256_setr_epi8(
5368 5, 0, 5, 4, 9, 13, 7, 4,
5369 13, 6, 6, 11, 5, 2, 9, 1,
5370 21, 0, 21, 20, 25, 29, 23, 20,
5371 29, 22, 22, 27, 21, 18, 25, 17,
5372 );
5373 let r = _mm256_shuffle_epi8(a, b);
5374 assert_eq_m256i(r, expected);
5375 }
5376
5377 #[simd_test(enable = "avx2")]
5378 unsafe fn test_mm256_permutevar8x32_epi32() {
5379 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5380 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5381 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5382 let r = _mm256_permutevar8x32_epi32(a, b);
5383 assert_eq_m256i(r, expected);
5384 }
5385
5386 #[simd_test(enable = "avx2")]
5387 unsafe fn test_mm256_permute4x64_epi64() {
5388 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5389 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5390 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5391 assert_eq_m256i(r, expected);
5392 }
5393
5394 #[simd_test(enable = "avx2")]
5395 unsafe fn test_mm256_permute2x128_si256() {
5396 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5397 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5398 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5399 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5400 assert_eq_m256i(r, e);
5401 }
5402
5403 #[simd_test(enable = "avx2")]
5404 unsafe fn test_mm256_permute4x64_pd() {
5405 let a = _mm256_setr_pd(1., 2., 3., 4.);
5406 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5407 let e = _mm256_setr_pd(4., 1., 2., 1.);
5408 assert_eq_m256d(r, e);
5409 }
5410
5411 #[simd_test(enable = "avx2")]
5412 unsafe fn test_mm256_permutevar8x32_ps() {
5413 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5414 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5415 let r = _mm256_permutevar8x32_ps(a, b);
5416 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5417 assert_eq_m256(r, e);
5418 }
5419
5420 #[simd_test(enable = "avx2")]
5421 unsafe fn test_mm_i32gather_epi32() {
5422 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5423 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5425 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5426 }
5427
5428 #[simd_test(enable = "avx2")]
5429 unsafe fn test_mm_mask_i32gather_epi32() {
5430 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5431 let r = _mm_mask_i32gather_epi32::<4>(
5433 _mm_set1_epi32(256),
5434 arr.as_ptr(),
5435 _mm_setr_epi32(0, 16, 64, 96),
5436 _mm_setr_epi32(-1, -1, -1, 0),
5437 );
5438 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5439 }
5440
5441 #[simd_test(enable = "avx2")]
5442 unsafe fn test_mm256_i32gather_epi32() {
5443 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5444 let r =
5446 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5447 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5448 }
5449
5450 #[simd_test(enable = "avx2")]
5451 unsafe fn test_mm256_mask_i32gather_epi32() {
5452 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5453 let r = _mm256_mask_i32gather_epi32::<4>(
5455 _mm256_set1_epi32(256),
5456 arr.as_ptr(),
5457 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5458 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5459 );
5460 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5461 }
5462
5463 #[simd_test(enable = "avx2")]
5464 unsafe fn test_mm_i32gather_ps() {
5465 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5466 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5468 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5469 }
5470
5471 #[simd_test(enable = "avx2")]
5472 unsafe fn test_mm_mask_i32gather_ps() {
5473 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5474 let r = _mm_mask_i32gather_ps::<4>(
5476 _mm_set1_ps(256.0),
5477 arr.as_ptr(),
5478 _mm_setr_epi32(0, 16, 64, 96),
5479 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5480 );
5481 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5482 }
5483
5484 #[simd_test(enable = "avx2")]
5485 unsafe fn test_mm256_i32gather_ps() {
5486 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5487 let r =
5489 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5490 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5491 }
5492
5493 #[simd_test(enable = "avx2")]
5494 unsafe fn test_mm256_mask_i32gather_ps() {
5495 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5496 let r = _mm256_mask_i32gather_ps::<4>(
5498 _mm256_set1_ps(256.0),
5499 arr.as_ptr(),
5500 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5501 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5502 );
5503 assert_eq_m256(
5504 r,
5505 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5506 );
5507 }
5508
5509 #[simd_test(enable = "avx2")]
5510 unsafe fn test_mm_i32gather_epi64() {
5511 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5512 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5514 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5515 }
5516
5517 #[simd_test(enable = "avx2")]
5518 unsafe fn test_mm_mask_i32gather_epi64() {
5519 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5520 let r = _mm_mask_i32gather_epi64::<8>(
5522 _mm_set1_epi64x(256),
5523 arr.as_ptr(),
5524 _mm_setr_epi32(16, 16, 16, 16),
5525 _mm_setr_epi64x(-1, 0),
5526 );
5527 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5528 }
5529
5530 #[simd_test(enable = "avx2")]
5531 unsafe fn test_mm256_i32gather_epi64() {
5532 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5533 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5535 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5536 }
5537
5538 #[simd_test(enable = "avx2")]
5539 unsafe fn test_mm256_mask_i32gather_epi64() {
5540 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5541 let r = _mm256_mask_i32gather_epi64::<8>(
5543 _mm256_set1_epi64x(256),
5544 arr.as_ptr(),
5545 _mm_setr_epi32(0, 16, 64, 96),
5546 _mm256_setr_epi64x(-1, -1, -1, 0),
5547 );
5548 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5549 }
5550
5551 #[simd_test(enable = "avx2")]
5552 unsafe fn test_mm_i32gather_pd() {
5553 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5554 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5556 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5557 }
5558
5559 #[simd_test(enable = "avx2")]
5560 unsafe fn test_mm_mask_i32gather_pd() {
5561 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5562 let r = _mm_mask_i32gather_pd::<8>(
5564 _mm_set1_pd(256.0),
5565 arr.as_ptr(),
5566 _mm_setr_epi32(16, 16, 16, 16),
5567 _mm_setr_pd(-1.0, 0.0),
5568 );
5569 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5570 }
5571
5572 #[simd_test(enable = "avx2")]
5573 unsafe fn test_mm256_i32gather_pd() {
5574 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5575 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5577 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5578 }
5579
5580 #[simd_test(enable = "avx2")]
5581 unsafe fn test_mm256_mask_i32gather_pd() {
5582 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5583 let r = _mm256_mask_i32gather_pd::<8>(
5585 _mm256_set1_pd(256.0),
5586 arr.as_ptr(),
5587 _mm_setr_epi32(0, 16, 64, 96),
5588 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5589 );
5590 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5591 }
5592
5593 #[simd_test(enable = "avx2")]
5594 unsafe fn test_mm_i64gather_epi32() {
5595 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5596 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5598 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5599 }
5600
5601 #[simd_test(enable = "avx2")]
5602 unsafe fn test_mm_mask_i64gather_epi32() {
5603 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5604 let r = _mm_mask_i64gather_epi32::<4>(
5606 _mm_set1_epi32(256),
5607 arr.as_ptr(),
5608 _mm_setr_epi64x(0, 16),
5609 _mm_setr_epi32(-1, 0, -1, 0),
5610 );
5611 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5612 }
5613
5614 #[simd_test(enable = "avx2")]
5615 unsafe fn test_mm256_i64gather_epi32() {
5616 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5617 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5619 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5620 }
5621
5622 #[simd_test(enable = "avx2")]
5623 unsafe fn test_mm256_mask_i64gather_epi32() {
5624 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5625 let r = _mm256_mask_i64gather_epi32::<4>(
5627 _mm_set1_epi32(256),
5628 arr.as_ptr(),
5629 _mm256_setr_epi64x(0, 16, 64, 96),
5630 _mm_setr_epi32(-1, -1, -1, 0),
5631 );
5632 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5633 }
5634
5635 #[simd_test(enable = "avx2")]
5636 unsafe fn test_mm_i64gather_ps() {
5637 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5638 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5640 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5641 }
5642
5643 #[simd_test(enable = "avx2")]
5644 unsafe fn test_mm_mask_i64gather_ps() {
5645 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5646 let r = _mm_mask_i64gather_ps::<4>(
5648 _mm_set1_ps(256.0),
5649 arr.as_ptr(),
5650 _mm_setr_epi64x(0, 16),
5651 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5652 );
5653 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5654 }
5655
5656 #[simd_test(enable = "avx2")]
5657 unsafe fn test_mm256_i64gather_ps() {
5658 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5659 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5661 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5662 }
5663
5664 #[simd_test(enable = "avx2")]
5665 unsafe fn test_mm256_mask_i64gather_ps() {
5666 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5667 let r = _mm256_mask_i64gather_ps::<4>(
5669 _mm_set1_ps(256.0),
5670 arr.as_ptr(),
5671 _mm256_setr_epi64x(0, 16, 64, 96),
5672 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5673 );
5674 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5675 }
5676
5677 #[simd_test(enable = "avx2")]
5678 unsafe fn test_mm_i64gather_epi64() {
5679 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5680 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5682 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5683 }
5684
5685 #[simd_test(enable = "avx2")]
5686 unsafe fn test_mm_mask_i64gather_epi64() {
5687 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5688 let r = _mm_mask_i64gather_epi64::<8>(
5690 _mm_set1_epi64x(256),
5691 arr.as_ptr(),
5692 _mm_setr_epi64x(16, 16),
5693 _mm_setr_epi64x(-1, 0),
5694 );
5695 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5696 }
5697
5698 #[simd_test(enable = "avx2")]
5699 unsafe fn test_mm256_i64gather_epi64() {
5700 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5701 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5703 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5704 }
5705
5706 #[simd_test(enable = "avx2")]
5707 unsafe fn test_mm256_mask_i64gather_epi64() {
5708 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5709 let r = _mm256_mask_i64gather_epi64::<8>(
5711 _mm256_set1_epi64x(256),
5712 arr.as_ptr(),
5713 _mm256_setr_epi64x(0, 16, 64, 96),
5714 _mm256_setr_epi64x(-1, -1, -1, 0),
5715 );
5716 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5717 }
5718
5719 #[simd_test(enable = "avx2")]
5720 unsafe fn test_mm_i64gather_pd() {
5721 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5722 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5724 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5725 }
5726
5727 #[simd_test(enable = "avx2")]
5728 unsafe fn test_mm_mask_i64gather_pd() {
5729 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5730 let r = _mm_mask_i64gather_pd::<8>(
5732 _mm_set1_pd(256.0),
5733 arr.as_ptr(),
5734 _mm_setr_epi64x(16, 16),
5735 _mm_setr_pd(-1.0, 0.0),
5736 );
5737 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5738 }
5739
5740 #[simd_test(enable = "avx2")]
5741 unsafe fn test_mm256_i64gather_pd() {
5742 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5743 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5745 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5746 }
5747
5748 #[simd_test(enable = "avx2")]
5749 unsafe fn test_mm256_mask_i64gather_pd() {
5750 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5751 let r = _mm256_mask_i64gather_pd::<8>(
5753 _mm256_set1_pd(256.0),
5754 arr.as_ptr(),
5755 _mm256_setr_epi64x(0, 16, 64, 96),
5756 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5757 );
5758 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5759 }
5760
5761 #[simd_test(enable = "avx2")]
5762 unsafe fn test_mm256_extract_epi8() {
5763 #[rustfmt::skip]
5764 let a = _mm256_setr_epi8(
5765 -1, 1, 2, 3, 4, 5, 6, 7,
5766 8, 9, 10, 11, 12, 13, 14, 15,
5767 16, 17, 18, 19, 20, 21, 22, 23,
5768 24, 25, 26, 27, 28, 29, 30, 31
5769 );
5770 let r1 = _mm256_extract_epi8::<0>(a);
5771 let r2 = _mm256_extract_epi8::<3>(a);
5772 assert_eq!(r1, 0xFF);
5773 assert_eq!(r2, 3);
5774 }
5775
5776 #[simd_test(enable = "avx2")]
5777 unsafe fn test_mm256_extract_epi16() {
5778 #[rustfmt::skip]
5779 let a = _mm256_setr_epi16(
5780 -1, 1, 2, 3, 4, 5, 6, 7,
5781 8, 9, 10, 11, 12, 13, 14, 15,
5782 );
5783 let r1 = _mm256_extract_epi16::<0>(a);
5784 let r2 = _mm256_extract_epi16::<3>(a);
5785 assert_eq!(r1, 0xFFFF);
5786 assert_eq!(r2, 3);
5787 }
5788}