1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
36 unsafe {
37 let a = a.as_i32x8();
38 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
39 transmute(r)
40 }
41}
42
43#[inline]
47#[target_feature(enable = "avx2")]
48#[cfg_attr(test, assert_instr(vpabsw))]
49#[stable(feature = "simd_x86", since = "1.27.0")]
50#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
51pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
67pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
68 unsafe {
69 let a = a.as_i8x32();
70 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
71 transmute(r)
72 }
73}
74
75#[inline]
79#[target_feature(enable = "avx2")]
80#[cfg_attr(test, assert_instr(vpaddq))]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
84 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
85}
86
87#[inline]
91#[target_feature(enable = "avx2")]
92#[cfg_attr(test, assert_instr(vpaddd))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
95pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
96 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
97}
98
99#[inline]
103#[target_feature(enable = "avx2")]
104#[cfg_attr(test, assert_instr(vpaddw))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
108 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
109}
110
111#[inline]
115#[target_feature(enable = "avx2")]
116#[cfg_attr(test, assert_instr(vpaddb))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
119pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
120 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
121}
122
123#[inline]
127#[target_feature(enable = "avx2")]
128#[cfg_attr(test, assert_instr(vpaddsb))]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
132 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
133}
134
135#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddsw))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
143pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
144 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
145}
146
147#[inline]
151#[target_feature(enable = "avx2")]
152#[cfg_attr(test, assert_instr(vpaddusb))]
153#[stable(feature = "simd_x86", since = "1.27.0")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
156 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
157}
158
159#[inline]
163#[target_feature(enable = "avx2")]
164#[cfg_attr(test, assert_instr(vpaddusw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
167pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
168 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
169}
170
171#[inline]
176#[target_feature(enable = "avx2")]
177#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
178#[rustc_legacy_const_generics(2)]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
181pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
182 static_assert_uimm_bits!(IMM8, 8);
183
184 if IMM8 >= 32 {
187 return _mm256_setzero_si256();
188 }
189 let (a, b) = if IMM8 > 16 {
192 (_mm256_setzero_si256(), a)
193 } else {
194 (a, b)
195 };
196 unsafe {
197 if IMM8 == 16 {
198 return transmute(a);
199 }
200 }
201 const fn mask(shift: u32, i: u32) -> u32 {
202 let shift = shift % 16;
203 let mod_i = i % 16;
204 if mod_i < (16 - shift) {
205 i + shift
206 } else {
207 i + 16 + shift
208 }
209 }
210
211 unsafe {
212 let r: i8x32 = simd_shuffle!(
213 b.as_i8x32(),
214 a.as_i8x32(),
215 [
216 mask(IMM8 as u32, 0),
217 mask(IMM8 as u32, 1),
218 mask(IMM8 as u32, 2),
219 mask(IMM8 as u32, 3),
220 mask(IMM8 as u32, 4),
221 mask(IMM8 as u32, 5),
222 mask(IMM8 as u32, 6),
223 mask(IMM8 as u32, 7),
224 mask(IMM8 as u32, 8),
225 mask(IMM8 as u32, 9),
226 mask(IMM8 as u32, 10),
227 mask(IMM8 as u32, 11),
228 mask(IMM8 as u32, 12),
229 mask(IMM8 as u32, 13),
230 mask(IMM8 as u32, 14),
231 mask(IMM8 as u32, 15),
232 mask(IMM8 as u32, 16),
233 mask(IMM8 as u32, 17),
234 mask(IMM8 as u32, 18),
235 mask(IMM8 as u32, 19),
236 mask(IMM8 as u32, 20),
237 mask(IMM8 as u32, 21),
238 mask(IMM8 as u32, 22),
239 mask(IMM8 as u32, 23),
240 mask(IMM8 as u32, 24),
241 mask(IMM8 as u32, 25),
242 mask(IMM8 as u32, 26),
243 mask(IMM8 as u32, 27),
244 mask(IMM8 as u32, 28),
245 mask(IMM8 as u32, 29),
246 mask(IMM8 as u32, 30),
247 mask(IMM8 as u32, 31),
248 ],
249 );
250 transmute(r)
251 }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
264 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
265}
266
267#[inline]
272#[target_feature(enable = "avx2")]
273#[cfg_attr(test, assert_instr(vandnps))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
276pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
277 unsafe {
278 let all_ones = _mm256_set1_epi8(-1);
279 transmute(simd_and(
280 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
281 b.as_i64x4(),
282 ))
283 }
284}
285
286#[inline]
290#[target_feature(enable = "avx2")]
291#[cfg_attr(test, assert_instr(vpavgw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
294pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
295 unsafe {
296 let a = simd_cast::<_, u32x16>(a.as_u16x16());
297 let b = simd_cast::<_, u32x16>(b.as_u16x16());
298 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
299 transmute(simd_cast::<_, u16x16>(r))
300 }
301}
302
303#[inline]
307#[target_feature(enable = "avx2")]
308#[cfg_attr(test, assert_instr(vpavgb))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
311pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
312 unsafe {
313 let a = simd_cast::<_, u16x32>(a.as_u8x32());
314 let b = simd_cast::<_, u16x32>(b.as_u8x32());
315 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
316 transmute(simd_cast::<_, u8x32>(r))
317 }
318}
319
320#[inline]
324#[target_feature(enable = "avx2")]
325#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
326#[rustc_legacy_const_generics(2)]
327#[stable(feature = "simd_x86", since = "1.27.0")]
328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
329pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
330 static_assert_uimm_bits!(IMM4, 4);
331 unsafe {
332 let a = a.as_i32x4();
333 let b = b.as_i32x4();
334 let r: i32x4 = simd_shuffle!(
335 a,
336 b,
337 [
338 [0, 4, 0, 4][IMM4 as usize & 0b11],
339 [1, 1, 5, 5][IMM4 as usize & 0b11],
340 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
341 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
342 ],
343 );
344 transmute(r)
345 }
346}
347
348#[inline]
352#[target_feature(enable = "avx2")]
353#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
354#[rustc_legacy_const_generics(2)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
357pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
358 static_assert_uimm_bits!(IMM8, 8);
359 unsafe {
360 let a = a.as_i32x8();
361 let b = b.as_i32x8();
362 let r: i32x8 = simd_shuffle!(
363 a,
364 b,
365 [
366 [0, 8, 0, 8][IMM8 as usize & 0b11],
367 [1, 1, 9, 9][IMM8 as usize & 0b11],
368 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
369 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
370 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
371 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
372 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
373 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
374 ],
375 );
376 transmute(r)
377 }
378}
379
380#[inline]
384#[target_feature(enable = "avx2")]
385#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
386#[rustc_legacy_const_generics(2)]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
390 static_assert_uimm_bits!(IMM8, 8);
391 unsafe {
392 let a = a.as_i16x16();
393 let b = b.as_i16x16();
394
395 let r: i16x16 = simd_shuffle!(
396 a,
397 b,
398 [
399 [0, 16, 0, 16][IMM8 as usize & 0b11],
400 [1, 1, 17, 17][IMM8 as usize & 0b11],
401 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
402 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
403 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
404 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
405 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
406 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
407 [8, 24, 8, 24][IMM8 as usize & 0b11],
408 [9, 9, 25, 25][IMM8 as usize & 0b11],
409 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
410 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
411 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
412 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
413 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
414 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
415 ],
416 );
417 transmute(r)
418 }
419}
420
421#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendvb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
430 unsafe {
431 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
432 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
433 }
434}
435
436#[inline]
441#[target_feature(enable = "avx2")]
442#[cfg_attr(test, assert_instr(vpbroadcastb))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
445pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
446 unsafe {
447 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
448 transmute::<i8x16, _>(ret)
449 }
450}
451
452#[inline]
457#[target_feature(enable = "avx2")]
458#[cfg_attr(test, assert_instr(vpbroadcastb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
462 unsafe {
463 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
464 transmute::<i8x32, _>(ret)
465 }
466}
467
468#[inline]
475#[target_feature(enable = "avx2")]
476#[cfg_attr(test, assert_instr(vbroadcastss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
479pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
482 transmute::<i32x4, _>(ret)
483 }
484}
485
486#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vbroadcastss))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
498 unsafe {
499 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
500 transmute::<i32x8, _>(ret)
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx2")]
510#[cfg_attr(test, assert_instr(vmovddup))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
516 unsafe {
517 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
518 transmute::<i64x2, _>(ret)
519 }
520}
521
522#[inline]
527#[target_feature(enable = "avx2")]
528#[cfg_attr(test, assert_instr(vbroadcastsd))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
532 unsafe {
533 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
534 transmute::<i64x4, _>(ret)
535 }
536}
537
538#[inline]
543#[target_feature(enable = "avx2")]
544#[cfg_attr(test, assert_instr(vmovddup))]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
548 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
549}
550
551#[inline]
556#[target_feature(enable = "avx2")]
557#[cfg_attr(test, assert_instr(vbroadcastsd))]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
560pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
561 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
562}
563
564#[inline]
569#[target_feature(enable = "avx2")]
570#[stable(feature = "simd_x86_updates", since = "1.82.0")]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
573 unsafe {
574 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
575 transmute::<i64x4, _>(ret)
576 }
577}
578
579#[inline]
586#[target_feature(enable = "avx2")]
587#[stable(feature = "simd_x86", since = "1.27.0")]
588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
589pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
590 unsafe {
591 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
592 transmute::<i64x4, _>(ret)
593 }
594}
595
596#[inline]
601#[target_feature(enable = "avx2")]
602#[cfg_attr(test, assert_instr(vbroadcastss))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
605pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
606 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
607}
608
609#[inline]
614#[target_feature(enable = "avx2")]
615#[cfg_attr(test, assert_instr(vbroadcastss))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
618pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
620}
621
622#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
632 unsafe {
633 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
634 transmute::<i16x8, _>(ret)
635 }
636}
637
638#[inline]
643#[target_feature(enable = "avx2")]
644#[cfg_attr(test, assert_instr(vpbroadcastw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
648 unsafe {
649 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
650 transmute::<i16x16, _>(ret)
651 }
652}
653
654#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vpcmpeqq))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
663 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
664}
665
666#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vpcmpeqd))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
674pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
675 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
676}
677
678#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpcmpeqw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
686pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
688}
689
690#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
698pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
699 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
700}
701
702#[inline]
706#[target_feature(enable = "avx2")]
707#[cfg_attr(test, assert_instr(vpcmpgtq))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
711 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
712}
713
714#[inline]
718#[target_feature(enable = "avx2")]
719#[cfg_attr(test, assert_instr(vpcmpgtd))]
720#[stable(feature = "simd_x86", since = "1.27.0")]
721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
722pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
723 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
724}
725
726#[inline]
730#[target_feature(enable = "avx2")]
731#[cfg_attr(test, assert_instr(vpcmpgtw))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
735 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
736}
737
738#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpcmpgtb))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
747 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
748}
749
750#[inline]
754#[target_feature(enable = "avx2")]
755#[cfg_attr(test, assert_instr(vpmovsxwd))]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
759 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxwq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
771 unsafe {
772 let a = a.as_i16x8();
773 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
774 transmute::<i64x4, _>(simd_cast(v64))
775 }
776}
777
778#[inline]
782#[target_feature(enable = "avx2")]
783#[cfg_attr(test, assert_instr(vpmovsxdq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
786pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
787 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
788}
789
790#[inline]
794#[target_feature(enable = "avx2")]
795#[cfg_attr(test, assert_instr(vpmovsxbw))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
798pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
799 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
800}
801
802#[inline]
806#[target_feature(enable = "avx2")]
807#[cfg_attr(test, assert_instr(vpmovsxbd))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
810pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
811 unsafe {
812 let a = a.as_i8x16();
813 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
814 transmute::<i32x8, _>(simd_cast(v64))
815 }
816}
817
818#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovsxbq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
826pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
827 unsafe {
828 let a = a.as_i8x16();
829 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
830 transmute::<i64x4, _>(simd_cast(v32))
831 }
832}
833
834#[inline]
839#[target_feature(enable = "avx2")]
840#[cfg_attr(test, assert_instr(vpmovzxwd))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
843pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
844 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
845}
846
847#[inline]
852#[target_feature(enable = "avx2")]
853#[cfg_attr(test, assert_instr(vpmovzxwq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
857 unsafe {
858 let a = a.as_u16x8();
859 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
860 transmute::<i64x4, _>(simd_cast(v64))
861 }
862}
863
864#[inline]
868#[target_feature(enable = "avx2")]
869#[cfg_attr(test, assert_instr(vpmovzxdq))]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
873 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
874}
875
876#[inline]
880#[target_feature(enable = "avx2")]
881#[cfg_attr(test, assert_instr(vpmovzxbw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
885 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
886}
887
888#[inline]
893#[target_feature(enable = "avx2")]
894#[cfg_attr(test, assert_instr(vpmovzxbd))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
897pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
898 unsafe {
899 let a = a.as_u8x16();
900 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
901 transmute::<i32x8, _>(simd_cast(v64))
902 }
903}
904
905#[inline]
910#[target_feature(enable = "avx2")]
911#[cfg_attr(test, assert_instr(vpmovzxbq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
915 unsafe {
916 let a = a.as_u8x16();
917 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
918 transmute::<i64x4, _>(simd_cast(v32))
919 }
920}
921
922#[inline]
926#[target_feature(enable = "avx2")]
927#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
928#[rustc_legacy_const_generics(1)]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
932 static_assert_uimm_bits!(IMM1, 1);
933 unsafe {
934 let a = a.as_i64x4();
935 let b = i64x4::ZERO;
936 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
937 transmute(dst)
938 }
939}
940
941#[inline]
945#[target_feature(enable = "avx2")]
946#[cfg_attr(test, assert_instr(vphaddw))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
950 let a = a.as_i16x16();
951 let b = b.as_i16x16();
952 unsafe {
953 let even: i16x16 = simd_shuffle!(
954 a,
955 b,
956 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
957 );
958 let odd: i16x16 = simd_shuffle!(
959 a,
960 b,
961 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
962 );
963 simd_add(even, odd).as_m256i()
964 }
965}
966
967#[inline]
971#[target_feature(enable = "avx2")]
972#[cfg_attr(test, assert_instr(vphaddd))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
975pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
976 let a = a.as_i32x8();
977 let b = b.as_i32x8();
978 unsafe {
979 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
980 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
981 simd_add(even, odd).as_m256i()
982 }
983}
984
985#[inline]
990#[target_feature(enable = "avx2")]
991#[cfg_attr(test, assert_instr(vphaddsw))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
994 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
995}
996
997#[inline]
1001#[target_feature(enable = "avx2")]
1002#[cfg_attr(test, assert_instr(vphsubw))]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1006 let a = a.as_i16x16();
1007 let b = b.as_i16x16();
1008 unsafe {
1009 let even: i16x16 = simd_shuffle!(
1010 a,
1011 b,
1012 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1013 );
1014 let odd: i16x16 = simd_shuffle!(
1015 a,
1016 b,
1017 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1018 );
1019 simd_sub(even, odd).as_m256i()
1020 }
1021}
1022
1023#[inline]
1027#[target_feature(enable = "avx2")]
1028#[cfg_attr(test, assert_instr(vphsubd))]
1029#[stable(feature = "simd_x86", since = "1.27.0")]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1032 let a = a.as_i32x8();
1033 let b = b.as_i32x8();
1034 unsafe {
1035 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
1036 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
1037 simd_sub(even, odd).as_m256i()
1038 }
1039}
1040
1041#[inline]
1046#[target_feature(enable = "avx2")]
1047#[cfg_attr(test, assert_instr(vphsubsw))]
1048#[stable(feature = "simd_x86", since = "1.27.0")]
1049pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1050 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1051}
1052
1053#[inline]
1059#[target_feature(enable = "avx2")]
1060#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1061#[rustc_legacy_const_generics(2)]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1064 slice: *const i32,
1065 offsets: __m128i,
1066) -> __m128i {
1067 static_assert_imm8_scale!(SCALE);
1068 let zero = i32x4::ZERO;
1069 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1070 let offsets = offsets.as_i32x4();
1071 let slice = slice as *const i8;
1072 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1073 transmute(r)
1074}
1075
1076#[inline]
1083#[target_feature(enable = "avx2")]
1084#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1085#[rustc_legacy_const_generics(4)]
1086#[stable(feature = "simd_x86", since = "1.27.0")]
1087pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1088 src: __m128i,
1089 slice: *const i32,
1090 offsets: __m128i,
1091 mask: __m128i,
1092) -> __m128i {
1093 static_assert_imm8_scale!(SCALE);
1094 let src = src.as_i32x4();
1095 let mask = mask.as_i32x4();
1096 let offsets = offsets.as_i32x4();
1097 let slice = slice as *const i8;
1098 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1099 transmute(r)
1100}
1101
1102#[inline]
1108#[target_feature(enable = "avx2")]
1109#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1110#[rustc_legacy_const_generics(2)]
1111#[stable(feature = "simd_x86", since = "1.27.0")]
1112pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1113 slice: *const i32,
1114 offsets: __m256i,
1115) -> __m256i {
1116 static_assert_imm8_scale!(SCALE);
1117 let zero = i32x8::ZERO;
1118 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1119 let offsets = offsets.as_i32x8();
1120 let slice = slice as *const i8;
1121 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1122 transmute(r)
1123}
1124
1125#[inline]
1132#[target_feature(enable = "avx2")]
1133#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1134#[rustc_legacy_const_generics(4)]
1135#[stable(feature = "simd_x86", since = "1.27.0")]
1136pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1137 src: __m256i,
1138 slice: *const i32,
1139 offsets: __m256i,
1140 mask: __m256i,
1141) -> __m256i {
1142 static_assert_imm8_scale!(SCALE);
1143 let src = src.as_i32x8();
1144 let mask = mask.as_i32x8();
1145 let offsets = offsets.as_i32x8();
1146 let slice = slice as *const i8;
1147 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1148 transmute(r)
1149}
1150
1151#[inline]
1157#[target_feature(enable = "avx2")]
1158#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1159#[rustc_legacy_const_generics(2)]
1160#[stable(feature = "simd_x86", since = "1.27.0")]
1161pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1162 static_assert_imm8_scale!(SCALE);
1163 let zero = _mm_setzero_ps();
1164 let neg_one = _mm_set1_ps(-1.0);
1165 let offsets = offsets.as_i32x4();
1166 let slice = slice as *const i8;
1167 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1168}
1169
1170#[inline]
1177#[target_feature(enable = "avx2")]
1178#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1179#[rustc_legacy_const_generics(4)]
1180#[stable(feature = "simd_x86", since = "1.27.0")]
1181pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1182 src: __m128,
1183 slice: *const f32,
1184 offsets: __m128i,
1185 mask: __m128,
1186) -> __m128 {
1187 static_assert_imm8_scale!(SCALE);
1188 let offsets = offsets.as_i32x4();
1189 let slice = slice as *const i8;
1190 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1191}
1192
1193#[inline]
1199#[target_feature(enable = "avx2")]
1200#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1201#[rustc_legacy_const_generics(2)]
1202#[stable(feature = "simd_x86", since = "1.27.0")]
1203pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1204 static_assert_imm8_scale!(SCALE);
1205 let zero = _mm256_setzero_ps();
1206 let neg_one = _mm256_set1_ps(-1.0);
1207 let offsets = offsets.as_i32x8();
1208 let slice = slice as *const i8;
1209 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1210}
1211
1212#[inline]
1219#[target_feature(enable = "avx2")]
1220#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1221#[rustc_legacy_const_generics(4)]
1222#[stable(feature = "simd_x86", since = "1.27.0")]
1223pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1224 src: __m256,
1225 slice: *const f32,
1226 offsets: __m256i,
1227 mask: __m256,
1228) -> __m256 {
1229 static_assert_imm8_scale!(SCALE);
1230 let offsets = offsets.as_i32x8();
1231 let slice = slice as *const i8;
1232 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1233}
1234
1235#[inline]
1241#[target_feature(enable = "avx2")]
1242#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1243#[rustc_legacy_const_generics(2)]
1244#[stable(feature = "simd_x86", since = "1.27.0")]
1245pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1246 slice: *const i64,
1247 offsets: __m128i,
1248) -> __m128i {
1249 static_assert_imm8_scale!(SCALE);
1250 let zero = i64x2::ZERO;
1251 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1252 let offsets = offsets.as_i32x4();
1253 let slice = slice as *const i8;
1254 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1255 transmute(r)
1256}
1257
1258#[inline]
1265#[target_feature(enable = "avx2")]
1266#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1267#[rustc_legacy_const_generics(4)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1270 src: __m128i,
1271 slice: *const i64,
1272 offsets: __m128i,
1273 mask: __m128i,
1274) -> __m128i {
1275 static_assert_imm8_scale!(SCALE);
1276 let src = src.as_i64x2();
1277 let mask = mask.as_i64x2();
1278 let offsets = offsets.as_i32x4();
1279 let slice = slice as *const i8;
1280 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1281 transmute(r)
1282}
1283
1284#[inline]
1290#[target_feature(enable = "avx2")]
1291#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1292#[rustc_legacy_const_generics(2)]
1293#[stable(feature = "simd_x86", since = "1.27.0")]
1294pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1295 slice: *const i64,
1296 offsets: __m128i,
1297) -> __m256i {
1298 static_assert_imm8_scale!(SCALE);
1299 let zero = i64x4::ZERO;
1300 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1301 let offsets = offsets.as_i32x4();
1302 let slice = slice as *const i8;
1303 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1304 transmute(r)
1305}
1306
1307#[inline]
1314#[target_feature(enable = "avx2")]
1315#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1316#[rustc_legacy_const_generics(4)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1319 src: __m256i,
1320 slice: *const i64,
1321 offsets: __m128i,
1322 mask: __m256i,
1323) -> __m256i {
1324 static_assert_imm8_scale!(SCALE);
1325 let src = src.as_i64x4();
1326 let mask = mask.as_i64x4();
1327 let offsets = offsets.as_i32x4();
1328 let slice = slice as *const i8;
1329 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1330 transmute(r)
1331}
1332
1333#[inline]
1339#[target_feature(enable = "avx2")]
1340#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1341#[rustc_legacy_const_generics(2)]
1342#[stable(feature = "simd_x86", since = "1.27.0")]
1343pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1344 static_assert_imm8_scale!(SCALE);
1345 let zero = _mm_setzero_pd();
1346 let neg_one = _mm_set1_pd(-1.0);
1347 let offsets = offsets.as_i32x4();
1348 let slice = slice as *const i8;
1349 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1350}
1351
1352#[inline]
1359#[target_feature(enable = "avx2")]
1360#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1361#[rustc_legacy_const_generics(4)]
1362#[stable(feature = "simd_x86", since = "1.27.0")]
1363pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1364 src: __m128d,
1365 slice: *const f64,
1366 offsets: __m128i,
1367 mask: __m128d,
1368) -> __m128d {
1369 static_assert_imm8_scale!(SCALE);
1370 let offsets = offsets.as_i32x4();
1371 let slice = slice as *const i8;
1372 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1373}
1374
1375#[inline]
1381#[target_feature(enable = "avx2")]
1382#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1383#[rustc_legacy_const_generics(2)]
1384#[stable(feature = "simd_x86", since = "1.27.0")]
1385pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1386 slice: *const f64,
1387 offsets: __m128i,
1388) -> __m256d {
1389 static_assert_imm8_scale!(SCALE);
1390 let zero = _mm256_setzero_pd();
1391 let neg_one = _mm256_set1_pd(-1.0);
1392 let offsets = offsets.as_i32x4();
1393 let slice = slice as *const i8;
1394 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1395}
1396
1397#[inline]
1404#[target_feature(enable = "avx2")]
1405#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1406#[rustc_legacy_const_generics(4)]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1409 src: __m256d,
1410 slice: *const f64,
1411 offsets: __m128i,
1412 mask: __m256d,
1413) -> __m256d {
1414 static_assert_imm8_scale!(SCALE);
1415 let offsets = offsets.as_i32x4();
1416 let slice = slice as *const i8;
1417 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1418}
1419
1420#[inline]
1426#[target_feature(enable = "avx2")]
1427#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1428#[rustc_legacy_const_generics(2)]
1429#[stable(feature = "simd_x86", since = "1.27.0")]
1430pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1431 slice: *const i32,
1432 offsets: __m128i,
1433) -> __m128i {
1434 static_assert_imm8_scale!(SCALE);
1435 let zero = i32x4::ZERO;
1436 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1437 let offsets = offsets.as_i64x2();
1438 let slice = slice as *const i8;
1439 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1440 transmute(r)
1441}
1442
1443#[inline]
1450#[target_feature(enable = "avx2")]
1451#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1452#[rustc_legacy_const_generics(4)]
1453#[stable(feature = "simd_x86", since = "1.27.0")]
1454pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1455 src: __m128i,
1456 slice: *const i32,
1457 offsets: __m128i,
1458 mask: __m128i,
1459) -> __m128i {
1460 static_assert_imm8_scale!(SCALE);
1461 let src = src.as_i32x4();
1462 let mask = mask.as_i32x4();
1463 let offsets = offsets.as_i64x2();
1464 let slice = slice as *const i8;
1465 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1466 transmute(r)
1467}
1468
1469#[inline]
1475#[target_feature(enable = "avx2")]
1476#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1477#[rustc_legacy_const_generics(2)]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1480 slice: *const i32,
1481 offsets: __m256i,
1482) -> __m128i {
1483 static_assert_imm8_scale!(SCALE);
1484 let zero = i32x4::ZERO;
1485 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1486 let offsets = offsets.as_i64x4();
1487 let slice = slice as *const i8;
1488 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1489 transmute(r)
1490}
1491
1492#[inline]
1499#[target_feature(enable = "avx2")]
1500#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1501#[rustc_legacy_const_generics(4)]
1502#[stable(feature = "simd_x86", since = "1.27.0")]
1503pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1504 src: __m128i,
1505 slice: *const i32,
1506 offsets: __m256i,
1507 mask: __m128i,
1508) -> __m128i {
1509 static_assert_imm8_scale!(SCALE);
1510 let src = src.as_i32x4();
1511 let mask = mask.as_i32x4();
1512 let offsets = offsets.as_i64x4();
1513 let slice = slice as *const i8;
1514 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1515 transmute(r)
1516}
1517
1518#[inline]
1524#[target_feature(enable = "avx2")]
1525#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1526#[rustc_legacy_const_generics(2)]
1527#[stable(feature = "simd_x86", since = "1.27.0")]
1528pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1529 static_assert_imm8_scale!(SCALE);
1530 let zero = _mm_setzero_ps();
1531 let neg_one = _mm_set1_ps(-1.0);
1532 let offsets = offsets.as_i64x2();
1533 let slice = slice as *const i8;
1534 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1535}
1536
1537#[inline]
1544#[target_feature(enable = "avx2")]
1545#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1546#[rustc_legacy_const_generics(4)]
1547#[stable(feature = "simd_x86", since = "1.27.0")]
1548pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1549 src: __m128,
1550 slice: *const f32,
1551 offsets: __m128i,
1552 mask: __m128,
1553) -> __m128 {
1554 static_assert_imm8_scale!(SCALE);
1555 let offsets = offsets.as_i64x2();
1556 let slice = slice as *const i8;
1557 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1558}
1559
1560#[inline]
1566#[target_feature(enable = "avx2")]
1567#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1568#[rustc_legacy_const_generics(2)]
1569#[stable(feature = "simd_x86", since = "1.27.0")]
1570pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1571 static_assert_imm8_scale!(SCALE);
1572 let zero = _mm_setzero_ps();
1573 let neg_one = _mm_set1_ps(-1.0);
1574 let offsets = offsets.as_i64x4();
1575 let slice = slice as *const i8;
1576 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1577}
1578
1579#[inline]
1586#[target_feature(enable = "avx2")]
1587#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1588#[rustc_legacy_const_generics(4)]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1591 src: __m128,
1592 slice: *const f32,
1593 offsets: __m256i,
1594 mask: __m128,
1595) -> __m128 {
1596 static_assert_imm8_scale!(SCALE);
1597 let offsets = offsets.as_i64x4();
1598 let slice = slice as *const i8;
1599 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1600}
1601
1602#[inline]
1608#[target_feature(enable = "avx2")]
1609#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1610#[rustc_legacy_const_generics(2)]
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1613 slice: *const i64,
1614 offsets: __m128i,
1615) -> __m128i {
1616 static_assert_imm8_scale!(SCALE);
1617 let zero = i64x2::ZERO;
1618 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1619 let slice = slice as *const i8;
1620 let offsets = offsets.as_i64x2();
1621 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1622 transmute(r)
1623}
1624
1625#[inline]
1632#[target_feature(enable = "avx2")]
1633#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1634#[rustc_legacy_const_generics(4)]
1635#[stable(feature = "simd_x86", since = "1.27.0")]
1636pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1637 src: __m128i,
1638 slice: *const i64,
1639 offsets: __m128i,
1640 mask: __m128i,
1641) -> __m128i {
1642 static_assert_imm8_scale!(SCALE);
1643 let src = src.as_i64x2();
1644 let mask = mask.as_i64x2();
1645 let offsets = offsets.as_i64x2();
1646 let slice = slice as *const i8;
1647 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1648 transmute(r)
1649}
1650
1651#[inline]
1657#[target_feature(enable = "avx2")]
1658#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1659#[rustc_legacy_const_generics(2)]
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1661pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1662 slice: *const i64,
1663 offsets: __m256i,
1664) -> __m256i {
1665 static_assert_imm8_scale!(SCALE);
1666 let zero = i64x4::ZERO;
1667 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1668 let slice = slice as *const i8;
1669 let offsets = offsets.as_i64x4();
1670 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1671 transmute(r)
1672}
1673
1674#[inline]
1681#[target_feature(enable = "avx2")]
1682#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1683#[rustc_legacy_const_generics(4)]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1686 src: __m256i,
1687 slice: *const i64,
1688 offsets: __m256i,
1689 mask: __m256i,
1690) -> __m256i {
1691 static_assert_imm8_scale!(SCALE);
1692 let src = src.as_i64x4();
1693 let mask = mask.as_i64x4();
1694 let offsets = offsets.as_i64x4();
1695 let slice = slice as *const i8;
1696 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1697 transmute(r)
1698}
1699
1700#[inline]
1706#[target_feature(enable = "avx2")]
1707#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1708#[rustc_legacy_const_generics(2)]
1709#[stable(feature = "simd_x86", since = "1.27.0")]
1710pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1711 static_assert_imm8_scale!(SCALE);
1712 let zero = _mm_setzero_pd();
1713 let neg_one = _mm_set1_pd(-1.0);
1714 let slice = slice as *const i8;
1715 let offsets = offsets.as_i64x2();
1716 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1717}
1718
1719#[inline]
1726#[target_feature(enable = "avx2")]
1727#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1728#[rustc_legacy_const_generics(4)]
1729#[stable(feature = "simd_x86", since = "1.27.0")]
1730pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1731 src: __m128d,
1732 slice: *const f64,
1733 offsets: __m128i,
1734 mask: __m128d,
1735) -> __m128d {
1736 static_assert_imm8_scale!(SCALE);
1737 let slice = slice as *const i8;
1738 let offsets = offsets.as_i64x2();
1739 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1740}
1741
1742#[inline]
1748#[target_feature(enable = "avx2")]
1749#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1750#[rustc_legacy_const_generics(2)]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1753 slice: *const f64,
1754 offsets: __m256i,
1755) -> __m256d {
1756 static_assert_imm8_scale!(SCALE);
1757 let zero = _mm256_setzero_pd();
1758 let neg_one = _mm256_set1_pd(-1.0);
1759 let slice = slice as *const i8;
1760 let offsets = offsets.as_i64x4();
1761 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1762}
1763
1764#[inline]
1771#[target_feature(enable = "avx2")]
1772#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1773#[rustc_legacy_const_generics(4)]
1774#[stable(feature = "simd_x86", since = "1.27.0")]
1775pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1776 src: __m256d,
1777 slice: *const f64,
1778 offsets: __m256i,
1779 mask: __m256d,
1780) -> __m256d {
1781 static_assert_imm8_scale!(SCALE);
1782 let slice = slice as *const i8;
1783 let offsets = offsets.as_i64x4();
1784 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1785}
1786
1787#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1794#[rustc_legacy_const_generics(2)]
1795#[stable(feature = "simd_x86", since = "1.27.0")]
1796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1797pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1798 static_assert_uimm_bits!(IMM1, 1);
1799 unsafe {
1800 let a = a.as_i64x4();
1801 let b = _mm256_castsi128_si256(b).as_i64x4();
1802 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1803 transmute(dst)
1804 }
1805}
1806
1807#[inline]
1813#[target_feature(enable = "avx2")]
1814#[cfg_attr(test, assert_instr(vpmaddwd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1817 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1830}
1831
1832#[inline]
1839#[target_feature(enable = "avx2")]
1840#[cfg_attr(test, assert_instr(vpmaddubsw))]
1841#[stable(feature = "simd_x86", since = "1.27.0")]
1842pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1843 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1844}
1845
1846#[inline]
1852#[target_feature(enable = "avx2")]
1853#[cfg_attr(test, assert_instr(vpmaskmovd))]
1854#[stable(feature = "simd_x86", since = "1.27.0")]
1855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1856pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1857 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1858 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1859}
1860
1861#[inline]
1867#[target_feature(enable = "avx2")]
1868#[cfg_attr(test, assert_instr(vpmaskmovd))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1871pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1872 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1873 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1874}
1875
1876#[inline]
1882#[target_feature(enable = "avx2")]
1883#[cfg_attr(test, assert_instr(vpmaskmovq))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1886pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1887 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1888 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1889}
1890
1891#[inline]
1897#[target_feature(enable = "avx2")]
1898#[cfg_attr(test, assert_instr(vpmaskmovq))]
1899#[stable(feature = "simd_x86", since = "1.27.0")]
1900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1901pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1902 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1903 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1904}
1905
1906#[inline]
1912#[target_feature(enable = "avx2")]
1913#[cfg_attr(test, assert_instr(vpmaskmovd))]
1914#[stable(feature = "simd_x86", since = "1.27.0")]
1915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1916pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1917 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1918 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1919}
1920
1921#[inline]
1927#[target_feature(enable = "avx2")]
1928#[cfg_attr(test, assert_instr(vpmaskmovd))]
1929#[stable(feature = "simd_x86", since = "1.27.0")]
1930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1931pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1932 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1933 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1934}
1935
1936#[inline]
1942#[target_feature(enable = "avx2")]
1943#[cfg_attr(test, assert_instr(vpmaskmovq))]
1944#[stable(feature = "simd_x86", since = "1.27.0")]
1945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1946pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1947 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1948 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1949}
1950
1951#[inline]
1957#[target_feature(enable = "avx2")]
1958#[cfg_attr(test, assert_instr(vpmaskmovq))]
1959#[stable(feature = "simd_x86", since = "1.27.0")]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1962 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1963 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1964}
1965
1966#[inline]
1971#[target_feature(enable = "avx2")]
1972#[cfg_attr(test, assert_instr(vpmaxsw))]
1973#[stable(feature = "simd_x86", since = "1.27.0")]
1974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1975pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1976 unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
1977}
1978
1979#[inline]
1984#[target_feature(enable = "avx2")]
1985#[cfg_attr(test, assert_instr(vpmaxsd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1988pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1989 unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
1990}
1991
1992#[inline]
1997#[target_feature(enable = "avx2")]
1998#[cfg_attr(test, assert_instr(vpmaxsb))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2001pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
2002 unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2003}
2004
2005#[inline]
2010#[target_feature(enable = "avx2")]
2011#[cfg_attr(test, assert_instr(vpmaxuw))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2014pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
2015 unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "avx2")]
2024#[cfg_attr(test, assert_instr(vpmaxud))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2027pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2028 unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2029}
2030
2031#[inline]
2036#[target_feature(enable = "avx2")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[stable(feature = "simd_x86", since = "1.27.0")]
2039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2040pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2041 unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2042}
2043
2044#[inline]
2049#[target_feature(enable = "avx2")]
2050#[cfg_attr(test, assert_instr(vpminsw))]
2051#[stable(feature = "simd_x86", since = "1.27.0")]
2052#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2053pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2054 unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2055}
2056
2057#[inline]
2062#[target_feature(enable = "avx2")]
2063#[cfg_attr(test, assert_instr(vpminsd))]
2064#[stable(feature = "simd_x86", since = "1.27.0")]
2065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2066pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2067 unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2068}
2069
2070#[inline]
2075#[target_feature(enable = "avx2")]
2076#[cfg_attr(test, assert_instr(vpminsb))]
2077#[stable(feature = "simd_x86", since = "1.27.0")]
2078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2079pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2080 unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2081}
2082
2083#[inline]
2088#[target_feature(enable = "avx2")]
2089#[cfg_attr(test, assert_instr(vpminuw))]
2090#[stable(feature = "simd_x86", since = "1.27.0")]
2091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2092pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2093 unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2094}
2095
2096#[inline]
2101#[target_feature(enable = "avx2")]
2102#[cfg_attr(test, assert_instr(vpminud))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2105pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2106 unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2107}
2108
2109#[inline]
2114#[target_feature(enable = "avx2")]
2115#[cfg_attr(test, assert_instr(vpminub))]
2116#[stable(feature = "simd_x86", since = "1.27.0")]
2117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2118pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2119 unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2120}
2121
2122#[inline]
2127#[target_feature(enable = "avx2")]
2128#[cfg_attr(test, assert_instr(vpmovmskb))]
2129#[stable(feature = "simd_x86", since = "1.27.0")]
2130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2131pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2132 unsafe {
2133 let z = i8x32::ZERO;
2134 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2135 simd_bitmask::<_, u32>(m) as i32
2136 }
2137}
2138
2139#[inline]
2149#[target_feature(enable = "avx2")]
2150#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2151#[rustc_legacy_const_generics(2)]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2154 static_assert_uimm_bits!(IMM8, 8);
2155 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2156}
2157
2158#[inline]
2165#[target_feature(enable = "avx2")]
2166#[cfg_attr(test, assert_instr(vpmuldq))]
2167#[stable(feature = "simd_x86", since = "1.27.0")]
2168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2169pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2170 unsafe {
2171 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2172 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2173 transmute(simd_mul(a, b))
2174 }
2175}
2176
2177#[inline]
2184#[target_feature(enable = "avx2")]
2185#[cfg_attr(test, assert_instr(vpmuludq))]
2186#[stable(feature = "simd_x86", since = "1.27.0")]
2187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2188pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2189 unsafe {
2190 let a = a.as_u64x4();
2191 let b = b.as_u64x4();
2192 let mask = u64x4::splat(u32::MAX as u64);
2193 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2194 }
2195}
2196
2197#[inline]
2203#[target_feature(enable = "avx2")]
2204#[cfg_attr(test, assert_instr(vpmulhw))]
2205#[stable(feature = "simd_x86", since = "1.27.0")]
2206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2207pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2208 unsafe {
2209 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2210 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2211 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2212 transmute(simd_cast::<i32x16, i16x16>(r))
2213 }
2214}
2215
2216#[inline]
2222#[target_feature(enable = "avx2")]
2223#[cfg_attr(test, assert_instr(vpmulhuw))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2226pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2227 unsafe {
2228 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2229 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2230 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2231 transmute(simd_cast::<u32x16, u16x16>(r))
2232 }
2233}
2234
2235#[inline]
2241#[target_feature(enable = "avx2")]
2242#[cfg_attr(test, assert_instr(vpmullw))]
2243#[stable(feature = "simd_x86", since = "1.27.0")]
2244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2245pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2246 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2247}
2248
2249#[inline]
2255#[target_feature(enable = "avx2")]
2256#[cfg_attr(test, assert_instr(vpmulld))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2259pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2260 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2261}
2262
2263#[inline]
2270#[target_feature(enable = "avx2")]
2271#[cfg_attr(test, assert_instr(vpmulhrsw))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2274 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2275}
2276
2277#[inline]
2282#[target_feature(enable = "avx2")]
2283#[cfg_attr(test, assert_instr(vorps))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2286pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2287 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2288}
2289
2290#[inline]
2295#[target_feature(enable = "avx2")]
2296#[cfg_attr(test, assert_instr(vpacksswb))]
2297#[stable(feature = "simd_x86", since = "1.27.0")]
2298pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2299 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2300}
2301
2302#[inline]
2307#[target_feature(enable = "avx2")]
2308#[cfg_attr(test, assert_instr(vpackssdw))]
2309#[stable(feature = "simd_x86", since = "1.27.0")]
2310pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2311 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2312}
2313
2314#[inline]
2319#[target_feature(enable = "avx2")]
2320#[cfg_attr(test, assert_instr(vpackuswb))]
2321#[stable(feature = "simd_x86", since = "1.27.0")]
2322pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2323 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2324}
2325
2326#[inline]
2331#[target_feature(enable = "avx2")]
2332#[cfg_attr(test, assert_instr(vpackusdw))]
2333#[stable(feature = "simd_x86", since = "1.27.0")]
2334pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2335 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2336}
2337
2338#[inline]
2345#[target_feature(enable = "avx2")]
2346#[cfg_attr(test, assert_instr(vpermps))]
2347#[stable(feature = "simd_x86", since = "1.27.0")]
2348pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2349 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2350}
2351
2352#[inline]
2356#[target_feature(enable = "avx2")]
2357#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2358#[rustc_legacy_const_generics(1)]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2361pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2362 static_assert_uimm_bits!(IMM8, 8);
2363 unsafe {
2364 let zero = i64x4::ZERO;
2365 let r: i64x4 = simd_shuffle!(
2366 a.as_i64x4(),
2367 zero,
2368 [
2369 IMM8 as u32 & 0b11,
2370 (IMM8 as u32 >> 2) & 0b11,
2371 (IMM8 as u32 >> 4) & 0b11,
2372 (IMM8 as u32 >> 6) & 0b11,
2373 ],
2374 );
2375 transmute(r)
2376 }
2377}
2378
2379#[inline]
2383#[target_feature(enable = "avx2")]
2384#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2385#[rustc_legacy_const_generics(2)]
2386#[stable(feature = "simd_x86", since = "1.27.0")]
2387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2388pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2389 static_assert_uimm_bits!(IMM8, 8);
2390 _mm256_permute2f128_si256::<IMM8>(a, b)
2391}
2392
2393#[inline]
2398#[target_feature(enable = "avx2")]
2399#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2400#[rustc_legacy_const_generics(1)]
2401#[stable(feature = "simd_x86", since = "1.27.0")]
2402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2403pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2404 static_assert_uimm_bits!(IMM8, 8);
2405 unsafe {
2406 simd_shuffle!(
2407 a,
2408 _mm256_undefined_pd(),
2409 [
2410 IMM8 as u32 & 0b11,
2411 (IMM8 as u32 >> 2) & 0b11,
2412 (IMM8 as u32 >> 4) & 0b11,
2413 (IMM8 as u32 >> 6) & 0b11,
2414 ],
2415 )
2416 }
2417}
2418
2419#[inline]
2424#[target_feature(enable = "avx2")]
2425#[cfg_attr(test, assert_instr(vpermps))]
2426#[stable(feature = "simd_x86", since = "1.27.0")]
2427pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2428 unsafe { permps(a, idx.as_i32x8()) }
2429}
2430
2431#[inline]
2438#[target_feature(enable = "avx2")]
2439#[cfg_attr(test, assert_instr(vpsadbw))]
2440#[stable(feature = "simd_x86", since = "1.27.0")]
2441pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2442 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2443}
2444
2445#[inline]
2476#[target_feature(enable = "avx2")]
2477#[cfg_attr(test, assert_instr(vpshufb))]
2478#[stable(feature = "simd_x86", since = "1.27.0")]
2479pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2480 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2481}
2482
2483#[inline]
2514#[target_feature(enable = "avx2")]
2515#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2516#[rustc_legacy_const_generics(1)]
2517#[stable(feature = "simd_x86", since = "1.27.0")]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2520 static_assert_uimm_bits!(MASK, 8);
2521 unsafe {
2522 let r: i32x8 = simd_shuffle!(
2523 a.as_i32x8(),
2524 a.as_i32x8(),
2525 [
2526 MASK as u32 & 0b11,
2527 (MASK as u32 >> 2) & 0b11,
2528 (MASK as u32 >> 4) & 0b11,
2529 (MASK as u32 >> 6) & 0b11,
2530 (MASK as u32 & 0b11) + 4,
2531 ((MASK as u32 >> 2) & 0b11) + 4,
2532 ((MASK as u32 >> 4) & 0b11) + 4,
2533 ((MASK as u32 >> 6) & 0b11) + 4,
2534 ],
2535 );
2536 transmute(r)
2537 }
2538}
2539
2540#[inline]
2546#[target_feature(enable = "avx2")]
2547#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2548#[rustc_legacy_const_generics(1)]
2549#[stable(feature = "simd_x86", since = "1.27.0")]
2550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2551pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2552 static_assert_uimm_bits!(IMM8, 8);
2553 unsafe {
2554 let a = a.as_i16x16();
2555 let r: i16x16 = simd_shuffle!(
2556 a,
2557 a,
2558 [
2559 0,
2560 1,
2561 2,
2562 3,
2563 4 + (IMM8 as u32 & 0b11),
2564 4 + ((IMM8 as u32 >> 2) & 0b11),
2565 4 + ((IMM8 as u32 >> 4) & 0b11),
2566 4 + ((IMM8 as u32 >> 6) & 0b11),
2567 8,
2568 9,
2569 10,
2570 11,
2571 12 + (IMM8 as u32 & 0b11),
2572 12 + ((IMM8 as u32 >> 2) & 0b11),
2573 12 + ((IMM8 as u32 >> 4) & 0b11),
2574 12 + ((IMM8 as u32 >> 6) & 0b11),
2575 ],
2576 );
2577 transmute(r)
2578 }
2579}
2580
2581#[inline]
2587#[target_feature(enable = "avx2")]
2588#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2589#[rustc_legacy_const_generics(1)]
2590#[stable(feature = "simd_x86", since = "1.27.0")]
2591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2592pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2593 static_assert_uimm_bits!(IMM8, 8);
2594 unsafe {
2595 let a = a.as_i16x16();
2596 let r: i16x16 = simd_shuffle!(
2597 a,
2598 a,
2599 [
2600 0 + (IMM8 as u32 & 0b11),
2601 0 + ((IMM8 as u32 >> 2) & 0b11),
2602 0 + ((IMM8 as u32 >> 4) & 0b11),
2603 0 + ((IMM8 as u32 >> 6) & 0b11),
2604 4,
2605 5,
2606 6,
2607 7,
2608 8 + (IMM8 as u32 & 0b11),
2609 8 + ((IMM8 as u32 >> 2) & 0b11),
2610 8 + ((IMM8 as u32 >> 4) & 0b11),
2611 8 + ((IMM8 as u32 >> 6) & 0b11),
2612 12,
2613 13,
2614 14,
2615 15,
2616 ],
2617 );
2618 transmute(r)
2619 }
2620}
2621
2622#[inline]
2628#[target_feature(enable = "avx2")]
2629#[cfg_attr(test, assert_instr(vpsignw))]
2630#[stable(feature = "simd_x86", since = "1.27.0")]
2631pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2632 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2633}
2634
2635#[inline]
2641#[target_feature(enable = "avx2")]
2642#[cfg_attr(test, assert_instr(vpsignd))]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2644pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2645 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2646}
2647
2648#[inline]
2654#[target_feature(enable = "avx2")]
2655#[cfg_attr(test, assert_instr(vpsignb))]
2656#[stable(feature = "simd_x86", since = "1.27.0")]
2657pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2658 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2659}
2660
2661#[inline]
2666#[target_feature(enable = "avx2")]
2667#[cfg_attr(test, assert_instr(vpsllw))]
2668#[stable(feature = "simd_x86", since = "1.27.0")]
2669pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2670 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2671}
2672
2673#[inline]
2678#[target_feature(enable = "avx2")]
2679#[cfg_attr(test, assert_instr(vpslld))]
2680#[stable(feature = "simd_x86", since = "1.27.0")]
2681pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2682 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2683}
2684
2685#[inline]
2690#[target_feature(enable = "avx2")]
2691#[cfg_attr(test, assert_instr(vpsllq))]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2694 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2695}
2696
2697#[inline]
2702#[target_feature(enable = "avx2")]
2703#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2704#[rustc_legacy_const_generics(1)]
2705#[stable(feature = "simd_x86", since = "1.27.0")]
2706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2707pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2708 static_assert_uimm_bits!(IMM8, 8);
2709 unsafe {
2710 if IMM8 >= 16 {
2711 _mm256_setzero_si256()
2712 } else {
2713 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2714 }
2715 }
2716}
2717
2718#[inline]
2723#[target_feature(enable = "avx2")]
2724#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2725#[rustc_legacy_const_generics(1)]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2728pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2729 unsafe {
2730 static_assert_uimm_bits!(IMM8, 8);
2731 if IMM8 >= 32 {
2732 _mm256_setzero_si256()
2733 } else {
2734 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2735 }
2736 }
2737}
2738
2739#[inline]
2744#[target_feature(enable = "avx2")]
2745#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2746#[rustc_legacy_const_generics(1)]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2749pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2750 unsafe {
2751 static_assert_uimm_bits!(IMM8, 8);
2752 if IMM8 >= 64 {
2753 _mm256_setzero_si256()
2754 } else {
2755 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2756 }
2757 }
2758}
2759
2760#[inline]
2764#[target_feature(enable = "avx2")]
2765#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2766#[rustc_legacy_const_generics(1)]
2767#[stable(feature = "simd_x86", since = "1.27.0")]
2768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2769pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2770 static_assert_uimm_bits!(IMM8, 8);
2771 _mm256_bslli_epi128::<IMM8>(a)
2772}
2773
2774#[inline]
2778#[target_feature(enable = "avx2")]
2779#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2780#[rustc_legacy_const_generics(1)]
2781#[stable(feature = "simd_x86", since = "1.27.0")]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2784 static_assert_uimm_bits!(IMM8, 8);
2785 const fn mask(shift: i32, i: u32) -> u32 {
2786 let shift = shift as u32 & 0xff;
2787 if shift > 15 || i % 16 < shift {
2788 0
2789 } else {
2790 32 + (i - shift)
2791 }
2792 }
2793 unsafe {
2794 let a = a.as_i8x32();
2795 let r: i8x32 = simd_shuffle!(
2796 i8x32::ZERO,
2797 a,
2798 [
2799 mask(IMM8, 0),
2800 mask(IMM8, 1),
2801 mask(IMM8, 2),
2802 mask(IMM8, 3),
2803 mask(IMM8, 4),
2804 mask(IMM8, 5),
2805 mask(IMM8, 6),
2806 mask(IMM8, 7),
2807 mask(IMM8, 8),
2808 mask(IMM8, 9),
2809 mask(IMM8, 10),
2810 mask(IMM8, 11),
2811 mask(IMM8, 12),
2812 mask(IMM8, 13),
2813 mask(IMM8, 14),
2814 mask(IMM8, 15),
2815 mask(IMM8, 16),
2816 mask(IMM8, 17),
2817 mask(IMM8, 18),
2818 mask(IMM8, 19),
2819 mask(IMM8, 20),
2820 mask(IMM8, 21),
2821 mask(IMM8, 22),
2822 mask(IMM8, 23),
2823 mask(IMM8, 24),
2824 mask(IMM8, 25),
2825 mask(IMM8, 26),
2826 mask(IMM8, 27),
2827 mask(IMM8, 28),
2828 mask(IMM8, 29),
2829 mask(IMM8, 30),
2830 mask(IMM8, 31),
2831 ],
2832 );
2833 transmute(r)
2834 }
2835}
2836
2837#[inline]
2843#[target_feature(enable = "avx2")]
2844#[cfg_attr(test, assert_instr(vpsllvd))]
2845#[stable(feature = "simd_x86", since = "1.27.0")]
2846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2847pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2848 unsafe {
2849 let count = count.as_u32x4();
2850 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2851 let count = simd_select(no_overflow, count, u32x4::ZERO);
2852 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2853 }
2854}
2855
2856#[inline]
2862#[target_feature(enable = "avx2")]
2863#[cfg_attr(test, assert_instr(vpsllvd))]
2864#[stable(feature = "simd_x86", since = "1.27.0")]
2865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2866pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2867 unsafe {
2868 let count = count.as_u32x8();
2869 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2870 let count = simd_select(no_overflow, count, u32x8::ZERO);
2871 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2872 }
2873}
2874
2875#[inline]
2881#[target_feature(enable = "avx2")]
2882#[cfg_attr(test, assert_instr(vpsllvq))]
2883#[stable(feature = "simd_x86", since = "1.27.0")]
2884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2885pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2886 unsafe {
2887 let count = count.as_u64x2();
2888 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2889 let count = simd_select(no_overflow, count, u64x2::ZERO);
2890 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2891 }
2892}
2893
2894#[inline]
2900#[target_feature(enable = "avx2")]
2901#[cfg_attr(test, assert_instr(vpsllvq))]
2902#[stable(feature = "simd_x86", since = "1.27.0")]
2903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2904pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2905 unsafe {
2906 let count = count.as_u64x4();
2907 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2908 let count = simd_select(no_overflow, count, u64x4::ZERO);
2909 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2910 }
2911}
2912
2913#[inline]
2918#[target_feature(enable = "avx2")]
2919#[cfg_attr(test, assert_instr(vpsraw))]
2920#[stable(feature = "simd_x86", since = "1.27.0")]
2921pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2922 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2923}
2924
2925#[inline]
2930#[target_feature(enable = "avx2")]
2931#[cfg_attr(test, assert_instr(vpsrad))]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2934 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2935}
2936
2937#[inline]
2942#[target_feature(enable = "avx2")]
2943#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2944#[rustc_legacy_const_generics(1)]
2945#[stable(feature = "simd_x86", since = "1.27.0")]
2946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2947pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2948 static_assert_uimm_bits!(IMM8, 8);
2949 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2950}
2951
2952#[inline]
2957#[target_feature(enable = "avx2")]
2958#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2959#[rustc_legacy_const_generics(1)]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2962pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2963 static_assert_uimm_bits!(IMM8, 8);
2964 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2965}
2966
2967#[inline]
2972#[target_feature(enable = "avx2")]
2973#[cfg_attr(test, assert_instr(vpsravd))]
2974#[stable(feature = "simd_x86", since = "1.27.0")]
2975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2976pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2977 unsafe {
2978 let count = count.as_u32x4();
2979 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2980 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2981 simd_shr(a.as_i32x4(), count).as_m128i()
2982 }
2983}
2984
2985#[inline]
2990#[target_feature(enable = "avx2")]
2991#[cfg_attr(test, assert_instr(vpsravd))]
2992#[stable(feature = "simd_x86", since = "1.27.0")]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2995 unsafe {
2996 let count = count.as_u32x8();
2997 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2998 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2999 simd_shr(a.as_i32x8(), count).as_m256i()
3000 }
3001}
3002
3003#[inline]
3007#[target_feature(enable = "avx2")]
3008#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3009#[rustc_legacy_const_generics(1)]
3010#[stable(feature = "simd_x86", since = "1.27.0")]
3011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3012pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
3013 static_assert_uimm_bits!(IMM8, 8);
3014 _mm256_bsrli_epi128::<IMM8>(a)
3015}
3016
3017#[inline]
3021#[target_feature(enable = "avx2")]
3022#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3023#[rustc_legacy_const_generics(1)]
3024#[stable(feature = "simd_x86", since = "1.27.0")]
3025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3026pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
3027 static_assert_uimm_bits!(IMM8, 8);
3028 const fn mask(shift: i32, i: u32) -> u32 {
3029 let shift = shift as u32 & 0xff;
3030 if shift > 15 || (15 - (i % 16)) < shift {
3031 0
3032 } else {
3033 32 + (i + shift)
3034 }
3035 }
3036 unsafe {
3037 let a = a.as_i8x32();
3038 let r: i8x32 = simd_shuffle!(
3039 i8x32::ZERO,
3040 a,
3041 [
3042 mask(IMM8, 0),
3043 mask(IMM8, 1),
3044 mask(IMM8, 2),
3045 mask(IMM8, 3),
3046 mask(IMM8, 4),
3047 mask(IMM8, 5),
3048 mask(IMM8, 6),
3049 mask(IMM8, 7),
3050 mask(IMM8, 8),
3051 mask(IMM8, 9),
3052 mask(IMM8, 10),
3053 mask(IMM8, 11),
3054 mask(IMM8, 12),
3055 mask(IMM8, 13),
3056 mask(IMM8, 14),
3057 mask(IMM8, 15),
3058 mask(IMM8, 16),
3059 mask(IMM8, 17),
3060 mask(IMM8, 18),
3061 mask(IMM8, 19),
3062 mask(IMM8, 20),
3063 mask(IMM8, 21),
3064 mask(IMM8, 22),
3065 mask(IMM8, 23),
3066 mask(IMM8, 24),
3067 mask(IMM8, 25),
3068 mask(IMM8, 26),
3069 mask(IMM8, 27),
3070 mask(IMM8, 28),
3071 mask(IMM8, 29),
3072 mask(IMM8, 30),
3073 mask(IMM8, 31),
3074 ],
3075 );
3076 transmute(r)
3077 }
3078}
3079
3080#[inline]
3085#[target_feature(enable = "avx2")]
3086#[cfg_attr(test, assert_instr(vpsrlw))]
3087#[stable(feature = "simd_x86", since = "1.27.0")]
3088pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3089 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3090}
3091
3092#[inline]
3097#[target_feature(enable = "avx2")]
3098#[cfg_attr(test, assert_instr(vpsrld))]
3099#[stable(feature = "simd_x86", since = "1.27.0")]
3100pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3101 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3102}
3103
3104#[inline]
3109#[target_feature(enable = "avx2")]
3110#[cfg_attr(test, assert_instr(vpsrlq))]
3111#[stable(feature = "simd_x86", since = "1.27.0")]
3112pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3113 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3114}
3115
3116#[inline]
3121#[target_feature(enable = "avx2")]
3122#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3123#[rustc_legacy_const_generics(1)]
3124#[stable(feature = "simd_x86", since = "1.27.0")]
3125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3126pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3127 static_assert_uimm_bits!(IMM8, 8);
3128 unsafe {
3129 if IMM8 >= 16 {
3130 _mm256_setzero_si256()
3131 } else {
3132 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3133 }
3134 }
3135}
3136
3137#[inline]
3142#[target_feature(enable = "avx2")]
3143#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3144#[rustc_legacy_const_generics(1)]
3145#[stable(feature = "simd_x86", since = "1.27.0")]
3146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3147pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3148 static_assert_uimm_bits!(IMM8, 8);
3149 unsafe {
3150 if IMM8 >= 32 {
3151 _mm256_setzero_si256()
3152 } else {
3153 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3154 }
3155 }
3156}
3157
3158#[inline]
3163#[target_feature(enable = "avx2")]
3164#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3165#[rustc_legacy_const_generics(1)]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3168pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3169 static_assert_uimm_bits!(IMM8, 8);
3170 unsafe {
3171 if IMM8 >= 64 {
3172 _mm256_setzero_si256()
3173 } else {
3174 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3175 }
3176 }
3177}
3178
3179#[inline]
3184#[target_feature(enable = "avx2")]
3185#[cfg_attr(test, assert_instr(vpsrlvd))]
3186#[stable(feature = "simd_x86", since = "1.27.0")]
3187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3188pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3189 unsafe {
3190 let count = count.as_u32x4();
3191 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3192 let count = simd_select(no_overflow, count, u32x4::ZERO);
3193 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3194 }
3195}
3196
3197#[inline]
3202#[target_feature(enable = "avx2")]
3203#[cfg_attr(test, assert_instr(vpsrlvd))]
3204#[stable(feature = "simd_x86", since = "1.27.0")]
3205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3206pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3207 unsafe {
3208 let count = count.as_u32x8();
3209 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3210 let count = simd_select(no_overflow, count, u32x8::ZERO);
3211 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3212 }
3213}
3214
3215#[inline]
3220#[target_feature(enable = "avx2")]
3221#[cfg_attr(test, assert_instr(vpsrlvq))]
3222#[stable(feature = "simd_x86", since = "1.27.0")]
3223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3224pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3225 unsafe {
3226 let count = count.as_u64x2();
3227 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3228 let count = simd_select(no_overflow, count, u64x2::ZERO);
3229 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3230 }
3231}
3232
3233#[inline]
3238#[target_feature(enable = "avx2")]
3239#[cfg_attr(test, assert_instr(vpsrlvq))]
3240#[stable(feature = "simd_x86", since = "1.27.0")]
3241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3242pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3243 unsafe {
3244 let count = count.as_u64x4();
3245 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3246 let count = simd_select(no_overflow, count, u64x4::ZERO);
3247 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3248 }
3249}
3250
3251#[inline]
3257#[target_feature(enable = "avx2")]
3258#[cfg_attr(test, assert_instr(vmovntdqa))]
3259#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3260pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3261 let dst: __m256i;
3262 crate::arch::asm!(
3263 vpl!("vmovntdqa {a}"),
3264 a = out(ymm_reg) dst,
3265 p = in(reg) mem_addr,
3266 options(pure, readonly, nostack, preserves_flags),
3267 );
3268 dst
3269}
3270
3271#[inline]
3275#[target_feature(enable = "avx2")]
3276#[cfg_attr(test, assert_instr(vpsubw))]
3277#[stable(feature = "simd_x86", since = "1.27.0")]
3278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3279pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3280 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3281}
3282
3283#[inline]
3287#[target_feature(enable = "avx2")]
3288#[cfg_attr(test, assert_instr(vpsubd))]
3289#[stable(feature = "simd_x86", since = "1.27.0")]
3290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3291pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3292 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3293}
3294
3295#[inline]
3299#[target_feature(enable = "avx2")]
3300#[cfg_attr(test, assert_instr(vpsubq))]
3301#[stable(feature = "simd_x86", since = "1.27.0")]
3302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3303pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3304 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3305}
3306
3307#[inline]
3311#[target_feature(enable = "avx2")]
3312#[cfg_attr(test, assert_instr(vpsubb))]
3313#[stable(feature = "simd_x86", since = "1.27.0")]
3314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3315pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3316 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3317}
3318
3319#[inline]
3324#[target_feature(enable = "avx2")]
3325#[cfg_attr(test, assert_instr(vpsubsw))]
3326#[stable(feature = "simd_x86", since = "1.27.0")]
3327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3328pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3329 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3330}
3331
3332#[inline]
3337#[target_feature(enable = "avx2")]
3338#[cfg_attr(test, assert_instr(vpsubsb))]
3339#[stable(feature = "simd_x86", since = "1.27.0")]
3340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3341pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3342 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3343}
3344
3345#[inline]
3350#[target_feature(enable = "avx2")]
3351#[cfg_attr(test, assert_instr(vpsubusw))]
3352#[stable(feature = "simd_x86", since = "1.27.0")]
3353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3354pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3355 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3356}
3357
3358#[inline]
3363#[target_feature(enable = "avx2")]
3364#[cfg_attr(test, assert_instr(vpsubusb))]
3365#[stable(feature = "simd_x86", since = "1.27.0")]
3366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3367pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3368 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3369}
3370
3371#[inline]
3411#[target_feature(enable = "avx2")]
3412#[cfg_attr(test, assert_instr(vpunpckhbw))]
3413#[stable(feature = "simd_x86", since = "1.27.0")]
3414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3415pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3416 unsafe {
3417 #[rustfmt::skip]
3418 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3419 8, 40, 9, 41, 10, 42, 11, 43,
3420 12, 44, 13, 45, 14, 46, 15, 47,
3421 24, 56, 25, 57, 26, 58, 27, 59,
3422 28, 60, 29, 61, 30, 62, 31, 63,
3423 ]);
3424 transmute(r)
3425 }
3426}
3427
3428#[inline]
3467#[target_feature(enable = "avx2")]
3468#[cfg_attr(test, assert_instr(vpunpcklbw))]
3469#[stable(feature = "simd_x86", since = "1.27.0")]
3470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3471pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3472 unsafe {
3473 #[rustfmt::skip]
3474 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3475 0, 32, 1, 33, 2, 34, 3, 35,
3476 4, 36, 5, 37, 6, 38, 7, 39,
3477 16, 48, 17, 49, 18, 50, 19, 51,
3478 20, 52, 21, 53, 22, 54, 23, 55,
3479 ]);
3480 transmute(r)
3481 }
3482}
3483
3484#[inline]
3519#[target_feature(enable = "avx2")]
3520#[cfg_attr(test, assert_instr(vpunpckhwd))]
3521#[stable(feature = "simd_x86", since = "1.27.0")]
3522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3523pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3524 unsafe {
3525 let r: i16x16 = simd_shuffle!(
3526 a.as_i16x16(),
3527 b.as_i16x16(),
3528 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3529 );
3530 transmute(r)
3531 }
3532}
3533
3534#[inline]
3570#[target_feature(enable = "avx2")]
3571#[cfg_attr(test, assert_instr(vpunpcklwd))]
3572#[stable(feature = "simd_x86", since = "1.27.0")]
3573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3574pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3575 unsafe {
3576 let r: i16x16 = simd_shuffle!(
3577 a.as_i16x16(),
3578 b.as_i16x16(),
3579 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3580 );
3581 transmute(r)
3582 }
3583}
3584
3585#[inline]
3614#[target_feature(enable = "avx2")]
3615#[cfg_attr(test, assert_instr(vunpckhps))]
3616#[stable(feature = "simd_x86", since = "1.27.0")]
3617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3618pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3619 unsafe {
3620 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3621 transmute(r)
3622 }
3623}
3624
3625#[inline]
3654#[target_feature(enable = "avx2")]
3655#[cfg_attr(test, assert_instr(vunpcklps))]
3656#[stable(feature = "simd_x86", since = "1.27.0")]
3657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3658pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3659 unsafe {
3660 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3661 transmute(r)
3662 }
3663}
3664
3665#[inline]
3694#[target_feature(enable = "avx2")]
3695#[cfg_attr(test, assert_instr(vunpckhpd))]
3696#[stable(feature = "simd_x86", since = "1.27.0")]
3697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3698pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3699 unsafe {
3700 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3701 transmute(r)
3702 }
3703}
3704
3705#[inline]
3734#[target_feature(enable = "avx2")]
3735#[cfg_attr(test, assert_instr(vunpcklpd))]
3736#[stable(feature = "simd_x86", since = "1.27.0")]
3737#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3738pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3739 unsafe {
3740 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3741 transmute(r)
3742 }
3743}
3744
3745#[inline]
3750#[target_feature(enable = "avx2")]
3751#[cfg_attr(test, assert_instr(vxorps))]
3752#[stable(feature = "simd_x86", since = "1.27.0")]
3753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3754pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3755 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3756}
3757
3758#[inline]
3765#[target_feature(enable = "avx2")]
3766#[rustc_legacy_const_generics(1)]
3768#[stable(feature = "simd_x86", since = "1.27.0")]
3769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3770pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3771 static_assert_uimm_bits!(INDEX, 5);
3772 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3773}
3774
3775#[inline]
3782#[target_feature(enable = "avx2")]
3783#[rustc_legacy_const_generics(1)]
3785#[stable(feature = "simd_x86", since = "1.27.0")]
3786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3787pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3788 static_assert_uimm_bits!(INDEX, 4);
3789 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3790}
3791
3792#[allow(improper_ctypes)]
3793unsafe extern "C" {
3794 #[link_name = "llvm.x86.avx2.phadd.sw"]
3795 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3796 #[link_name = "llvm.x86.avx2.phsub.sw"]
3797 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3798 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3799 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3800 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3801 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3802 #[link_name = "llvm.x86.avx2.mpsadbw"]
3803 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3804 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3805 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3806 #[link_name = "llvm.x86.avx2.packsswb"]
3807 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3808 #[link_name = "llvm.x86.avx2.packssdw"]
3809 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3810 #[link_name = "llvm.x86.avx2.packuswb"]
3811 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3812 #[link_name = "llvm.x86.avx2.packusdw"]
3813 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3814 #[link_name = "llvm.x86.avx2.psad.bw"]
3815 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3816 #[link_name = "llvm.x86.avx2.psign.b"]
3817 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3818 #[link_name = "llvm.x86.avx2.psign.w"]
3819 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3820 #[link_name = "llvm.x86.avx2.psign.d"]
3821 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3822 #[link_name = "llvm.x86.avx2.psll.w"]
3823 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3824 #[link_name = "llvm.x86.avx2.psll.d"]
3825 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3826 #[link_name = "llvm.x86.avx2.psll.q"]
3827 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3828 #[link_name = "llvm.x86.avx2.psra.w"]
3829 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3830 #[link_name = "llvm.x86.avx2.psra.d"]
3831 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3832 #[link_name = "llvm.x86.avx2.psrl.w"]
3833 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3834 #[link_name = "llvm.x86.avx2.psrl.d"]
3835 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3836 #[link_name = "llvm.x86.avx2.psrl.q"]
3837 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3838 #[link_name = "llvm.x86.avx2.pshuf.b"]
3839 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3840 #[link_name = "llvm.x86.avx2.permd"]
3841 fn permd(a: u32x8, b: u32x8) -> u32x8;
3842 #[link_name = "llvm.x86.avx2.permps"]
3843 fn permps(a: __m256, b: i32x8) -> __m256;
3844 #[link_name = "llvm.x86.avx2.gather.d.d"]
3845 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3846 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3847 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3848 #[link_name = "llvm.x86.avx2.gather.d.q"]
3849 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3850 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3851 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3852 #[link_name = "llvm.x86.avx2.gather.q.d"]
3853 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3854 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3855 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3856 #[link_name = "llvm.x86.avx2.gather.q.q"]
3857 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3858 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3859 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3860 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3861 fn pgatherdpd(
3862 src: __m128d,
3863 slice: *const i8,
3864 offsets: i32x4,
3865 mask: __m128d,
3866 scale: i8,
3867 ) -> __m128d;
3868 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3869 fn vpgatherdpd(
3870 src: __m256d,
3871 slice: *const i8,
3872 offsets: i32x4,
3873 mask: __m256d,
3874 scale: i8,
3875 ) -> __m256d;
3876 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3877 fn pgatherqpd(
3878 src: __m128d,
3879 slice: *const i8,
3880 offsets: i64x2,
3881 mask: __m128d,
3882 scale: i8,
3883 ) -> __m128d;
3884 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3885 fn vpgatherqpd(
3886 src: __m256d,
3887 slice: *const i8,
3888 offsets: i64x4,
3889 mask: __m256d,
3890 scale: i8,
3891 ) -> __m256d;
3892 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3893 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3894 -> __m128;
3895 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3896 fn vpgatherdps(
3897 src: __m256,
3898 slice: *const i8,
3899 offsets: i32x8,
3900 mask: __m256,
3901 scale: i8,
3902 ) -> __m256;
3903 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3904 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3905 -> __m128;
3906 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3907 fn vpgatherqps(
3908 src: __m128,
3909 slice: *const i8,
3910 offsets: i64x4,
3911 mask: __m128,
3912 scale: i8,
3913 ) -> __m128;
3914}
3915
3916#[cfg(test)]
3917mod tests {
3918 use crate::core_arch::assert_eq_const as assert_eq;
3919
3920 use stdarch_test::simd_test;
3921
3922 use crate::core_arch::x86::*;
3923
3924 #[simd_test(enable = "avx2")]
3925 const fn test_mm256_abs_epi32() {
3926 #[rustfmt::skip]
3927 let a = _mm256_setr_epi32(
3928 0, 1, -1, i32::MAX,
3929 i32::MIN, 100, -100, -32,
3930 );
3931 let r = _mm256_abs_epi32(a);
3932 #[rustfmt::skip]
3933 let e = _mm256_setr_epi32(
3934 0, 1, 1, i32::MAX,
3935 i32::MAX.wrapping_add(1), 100, 100, 32,
3936 );
3937 assert_eq_m256i(r, e);
3938 }
3939
3940 #[simd_test(enable = "avx2")]
3941 const fn test_mm256_abs_epi16() {
3942 #[rustfmt::skip]
3943 let a = _mm256_setr_epi16(
3944 0, 1, -1, 2, -2, 3, -3, 4,
3945 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3946 );
3947 let r = _mm256_abs_epi16(a);
3948 #[rustfmt::skip]
3949 let e = _mm256_setr_epi16(
3950 0, 1, 1, 2, 2, 3, 3, 4,
3951 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3952 );
3953 assert_eq_m256i(r, e);
3954 }
3955
3956 #[simd_test(enable = "avx2")]
3957 const fn test_mm256_abs_epi8() {
3958 #[rustfmt::skip]
3959 let a = _mm256_setr_epi8(
3960 0, 1, -1, 2, -2, 3, -3, 4,
3961 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3962 0, 1, -1, 2, -2, 3, -3, 4,
3963 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3964 );
3965 let r = _mm256_abs_epi8(a);
3966 #[rustfmt::skip]
3967 let e = _mm256_setr_epi8(
3968 0, 1, 1, 2, 2, 3, 3, 4,
3969 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3970 0, 1, 1, 2, 2, 3, 3, 4,
3971 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3972 );
3973 assert_eq_m256i(r, e);
3974 }
3975
3976 #[simd_test(enable = "avx2")]
3977 const fn test_mm256_add_epi64() {
3978 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3979 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3980 let r = _mm256_add_epi64(a, b);
3981 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3982 assert_eq_m256i(r, e);
3983 }
3984
3985 #[simd_test(enable = "avx2")]
3986 const fn test_mm256_add_epi32() {
3987 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3988 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3989 let r = _mm256_add_epi32(a, b);
3990 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3991 assert_eq_m256i(r, e);
3992 }
3993
3994 #[simd_test(enable = "avx2")]
3995 const fn test_mm256_add_epi16() {
3996 #[rustfmt::skip]
3997 let a = _mm256_setr_epi16(
3998 0, 1, 2, 3, 4, 5, 6, 7,
3999 8, 9, 10, 11, 12, 13, 14, 15,
4000 );
4001 #[rustfmt::skip]
4002 let b = _mm256_setr_epi16(
4003 0, 1, 2, 3, 4, 5, 6, 7,
4004 8, 9, 10, 11, 12, 13, 14, 15,
4005 );
4006 let r = _mm256_add_epi16(a, b);
4007 #[rustfmt::skip]
4008 let e = _mm256_setr_epi16(
4009 0, 2, 4, 6, 8, 10, 12, 14,
4010 16, 18, 20, 22, 24, 26, 28, 30,
4011 );
4012 assert_eq_m256i(r, e);
4013 }
4014
4015 #[simd_test(enable = "avx2")]
4016 const fn test_mm256_add_epi8() {
4017 #[rustfmt::skip]
4018 let a = _mm256_setr_epi8(
4019 0, 1, 2, 3, 4, 5, 6, 7,
4020 8, 9, 10, 11, 12, 13, 14, 15,
4021 16, 17, 18, 19, 20, 21, 22, 23,
4022 24, 25, 26, 27, 28, 29, 30, 31,
4023 );
4024 #[rustfmt::skip]
4025 let b = _mm256_setr_epi8(
4026 0, 1, 2, 3, 4, 5, 6, 7,
4027 8, 9, 10, 11, 12, 13, 14, 15,
4028 16, 17, 18, 19, 20, 21, 22, 23,
4029 24, 25, 26, 27, 28, 29, 30, 31,
4030 );
4031 let r = _mm256_add_epi8(a, b);
4032 #[rustfmt::skip]
4033 let e = _mm256_setr_epi8(
4034 0, 2, 4, 6, 8, 10, 12, 14,
4035 16, 18, 20, 22, 24, 26, 28, 30,
4036 32, 34, 36, 38, 40, 42, 44, 46,
4037 48, 50, 52, 54, 56, 58, 60, 62,
4038 );
4039 assert_eq_m256i(r, e);
4040 }
4041
4042 #[simd_test(enable = "avx2")]
4043 const fn test_mm256_adds_epi8() {
4044 #[rustfmt::skip]
4045 let a = _mm256_setr_epi8(
4046 0, 1, 2, 3, 4, 5, 6, 7,
4047 8, 9, 10, 11, 12, 13, 14, 15,
4048 16, 17, 18, 19, 20, 21, 22, 23,
4049 24, 25, 26, 27, 28, 29, 30, 31,
4050 );
4051 #[rustfmt::skip]
4052 let b = _mm256_setr_epi8(
4053 32, 33, 34, 35, 36, 37, 38, 39,
4054 40, 41, 42, 43, 44, 45, 46, 47,
4055 48, 49, 50, 51, 52, 53, 54, 55,
4056 56, 57, 58, 59, 60, 61, 62, 63,
4057 );
4058 let r = _mm256_adds_epi8(a, b);
4059 #[rustfmt::skip]
4060 let e = _mm256_setr_epi8(
4061 32, 34, 36, 38, 40, 42, 44, 46,
4062 48, 50, 52, 54, 56, 58, 60, 62,
4063 64, 66, 68, 70, 72, 74, 76, 78,
4064 80, 82, 84, 86, 88, 90, 92, 94,
4065 );
4066 assert_eq_m256i(r, e);
4067 }
4068
4069 #[simd_test(enable = "avx2")]
4070 fn test_mm256_adds_epi8_saturate_positive() {
4071 let a = _mm256_set1_epi8(0x7F);
4072 let b = _mm256_set1_epi8(1);
4073 let r = _mm256_adds_epi8(a, b);
4074 assert_eq_m256i(r, a);
4075 }
4076
4077 #[simd_test(enable = "avx2")]
4078 fn test_mm256_adds_epi8_saturate_negative() {
4079 let a = _mm256_set1_epi8(-0x80);
4080 let b = _mm256_set1_epi8(-1);
4081 let r = _mm256_adds_epi8(a, b);
4082 assert_eq_m256i(r, a);
4083 }
4084
4085 #[simd_test(enable = "avx2")]
4086 const fn test_mm256_adds_epi16() {
4087 #[rustfmt::skip]
4088 let a = _mm256_setr_epi16(
4089 0, 1, 2, 3, 4, 5, 6, 7,
4090 8, 9, 10, 11, 12, 13, 14, 15,
4091 );
4092 #[rustfmt::skip]
4093 let b = _mm256_setr_epi16(
4094 32, 33, 34, 35, 36, 37, 38, 39,
4095 40, 41, 42, 43, 44, 45, 46, 47,
4096 );
4097 let r = _mm256_adds_epi16(a, b);
4098 #[rustfmt::skip]
4099 let e = _mm256_setr_epi16(
4100 32, 34, 36, 38, 40, 42, 44, 46,
4101 48, 50, 52, 54, 56, 58, 60, 62,
4102 );
4103
4104 assert_eq_m256i(r, e);
4105 }
4106
4107 #[simd_test(enable = "avx2")]
4108 fn test_mm256_adds_epi16_saturate_positive() {
4109 let a = _mm256_set1_epi16(0x7FFF);
4110 let b = _mm256_set1_epi16(1);
4111 let r = _mm256_adds_epi16(a, b);
4112 assert_eq_m256i(r, a);
4113 }
4114
4115 #[simd_test(enable = "avx2")]
4116 fn test_mm256_adds_epi16_saturate_negative() {
4117 let a = _mm256_set1_epi16(-0x8000);
4118 let b = _mm256_set1_epi16(-1);
4119 let r = _mm256_adds_epi16(a, b);
4120 assert_eq_m256i(r, a);
4121 }
4122
4123 #[simd_test(enable = "avx2")]
4124 const fn test_mm256_adds_epu8() {
4125 #[rustfmt::skip]
4126 let a = _mm256_setr_epi8(
4127 0, 1, 2, 3, 4, 5, 6, 7,
4128 8, 9, 10, 11, 12, 13, 14, 15,
4129 16, 17, 18, 19, 20, 21, 22, 23,
4130 24, 25, 26, 27, 28, 29, 30, 31,
4131 );
4132 #[rustfmt::skip]
4133 let b = _mm256_setr_epi8(
4134 32, 33, 34, 35, 36, 37, 38, 39,
4135 40, 41, 42, 43, 44, 45, 46, 47,
4136 48, 49, 50, 51, 52, 53, 54, 55,
4137 56, 57, 58, 59, 60, 61, 62, 63,
4138 );
4139 let r = _mm256_adds_epu8(a, b);
4140 #[rustfmt::skip]
4141 let e = _mm256_setr_epi8(
4142 32, 34, 36, 38, 40, 42, 44, 46,
4143 48, 50, 52, 54, 56, 58, 60, 62,
4144 64, 66, 68, 70, 72, 74, 76, 78,
4145 80, 82, 84, 86, 88, 90, 92, 94,
4146 );
4147 assert_eq_m256i(r, e);
4148 }
4149
4150 #[simd_test(enable = "avx2")]
4151 fn test_mm256_adds_epu8_saturate() {
4152 let a = _mm256_set1_epi8(!0);
4153 let b = _mm256_set1_epi8(1);
4154 let r = _mm256_adds_epu8(a, b);
4155 assert_eq_m256i(r, a);
4156 }
4157
4158 #[simd_test(enable = "avx2")]
4159 const fn test_mm256_adds_epu16() {
4160 #[rustfmt::skip]
4161 let a = _mm256_setr_epi16(
4162 0, 1, 2, 3, 4, 5, 6, 7,
4163 8, 9, 10, 11, 12, 13, 14, 15,
4164 );
4165 #[rustfmt::skip]
4166 let b = _mm256_setr_epi16(
4167 32, 33, 34, 35, 36, 37, 38, 39,
4168 40, 41, 42, 43, 44, 45, 46, 47,
4169 );
4170 let r = _mm256_adds_epu16(a, b);
4171 #[rustfmt::skip]
4172 let e = _mm256_setr_epi16(
4173 32, 34, 36, 38, 40, 42, 44, 46,
4174 48, 50, 52, 54, 56, 58, 60, 62,
4175 );
4176
4177 assert_eq_m256i(r, e);
4178 }
4179
4180 #[simd_test(enable = "avx2")]
4181 fn test_mm256_adds_epu16_saturate() {
4182 let a = _mm256_set1_epi16(!0);
4183 let b = _mm256_set1_epi16(1);
4184 let r = _mm256_adds_epu16(a, b);
4185 assert_eq_m256i(r, a);
4186 }
4187
4188 #[simd_test(enable = "avx2")]
4189 const fn test_mm256_and_si256() {
4190 let a = _mm256_set1_epi8(5);
4191 let b = _mm256_set1_epi8(3);
4192 let got = _mm256_and_si256(a, b);
4193 assert_eq_m256i(got, _mm256_set1_epi8(1));
4194 }
4195
4196 #[simd_test(enable = "avx2")]
4197 const fn test_mm256_andnot_si256() {
4198 let a = _mm256_set1_epi8(5);
4199 let b = _mm256_set1_epi8(3);
4200 let got = _mm256_andnot_si256(a, b);
4201 assert_eq_m256i(got, _mm256_set1_epi8(2));
4202 }
4203
4204 #[simd_test(enable = "avx2")]
4205 const fn test_mm256_avg_epu8() {
4206 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4207 let r = _mm256_avg_epu8(a, b);
4208 assert_eq_m256i(r, _mm256_set1_epi8(6));
4209 }
4210
4211 #[simd_test(enable = "avx2")]
4212 const fn test_mm256_avg_epu16() {
4213 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4214 let r = _mm256_avg_epu16(a, b);
4215 assert_eq_m256i(r, _mm256_set1_epi16(6));
4216 }
4217
4218 #[simd_test(enable = "avx2")]
4219 const fn test_mm_blend_epi32() {
4220 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4221 let e = _mm_setr_epi32(9, 3, 3, 3);
4222 let r = _mm_blend_epi32::<0x01>(a, b);
4223 assert_eq_m128i(r, e);
4224
4225 let r = _mm_blend_epi32::<0x0E>(b, a);
4226 assert_eq_m128i(r, e);
4227 }
4228
4229 #[simd_test(enable = "avx2")]
4230 const fn test_mm256_blend_epi32() {
4231 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4232 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4233 let r = _mm256_blend_epi32::<0x01>(a, b);
4234 assert_eq_m256i(r, e);
4235
4236 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4237 let r = _mm256_blend_epi32::<0x82>(a, b);
4238 assert_eq_m256i(r, e);
4239
4240 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4241 let r = _mm256_blend_epi32::<0x7C>(a, b);
4242 assert_eq_m256i(r, e);
4243 }
4244
4245 #[simd_test(enable = "avx2")]
4246 const fn test_mm256_blend_epi16() {
4247 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4248 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4249 let r = _mm256_blend_epi16::<0x01>(a, b);
4250 assert_eq_m256i(r, e);
4251
4252 let r = _mm256_blend_epi16::<0xFE>(b, a);
4253 assert_eq_m256i(r, e);
4254 }
4255
4256 #[simd_test(enable = "avx2")]
4257 const fn test_mm256_blendv_epi8() {
4258 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4259 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4260 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4261 let r = _mm256_blendv_epi8(a, b, mask);
4262 assert_eq_m256i(r, e);
4263 }
4264
4265 #[simd_test(enable = "avx2")]
4266 const fn test_mm_broadcastb_epi8() {
4267 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4268 let res = _mm_broadcastb_epi8(a);
4269 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4270 }
4271
4272 #[simd_test(enable = "avx2")]
4273 const fn test_mm256_broadcastb_epi8() {
4274 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4275 let res = _mm256_broadcastb_epi8(a);
4276 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4277 }
4278
4279 #[simd_test(enable = "avx2")]
4280 const fn test_mm_broadcastd_epi32() {
4281 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4282 let res = _mm_broadcastd_epi32(a);
4283 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4284 }
4285
4286 #[simd_test(enable = "avx2")]
4287 const fn test_mm256_broadcastd_epi32() {
4288 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4289 let res = _mm256_broadcastd_epi32(a);
4290 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4291 }
4292
4293 #[simd_test(enable = "avx2")]
4294 const fn test_mm_broadcastq_epi64() {
4295 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4296 let res = _mm_broadcastq_epi64(a);
4297 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4298 }
4299
4300 #[simd_test(enable = "avx2")]
4301 const fn test_mm256_broadcastq_epi64() {
4302 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4303 let res = _mm256_broadcastq_epi64(a);
4304 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4305 }
4306
4307 #[simd_test(enable = "avx2")]
4308 const fn test_mm_broadcastsd_pd() {
4309 let a = _mm_setr_pd(6.88, 3.44);
4310 let res = _mm_broadcastsd_pd(a);
4311 assert_eq_m128d(res, _mm_set1_pd(6.88));
4312 }
4313
4314 #[simd_test(enable = "avx2")]
4315 const fn test_mm256_broadcastsd_pd() {
4316 let a = _mm_setr_pd(6.88, 3.44);
4317 let res = _mm256_broadcastsd_pd(a);
4318 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4319 }
4320
4321 #[simd_test(enable = "avx2")]
4322 const fn test_mm_broadcastsi128_si256() {
4323 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4324 let res = _mm_broadcastsi128_si256(a);
4325 let retval = _mm256_setr_epi64x(
4326 0x0987654321012334,
4327 0x5678909876543210,
4328 0x0987654321012334,
4329 0x5678909876543210,
4330 );
4331 assert_eq_m256i(res, retval);
4332 }
4333
4334 #[simd_test(enable = "avx2")]
4335 const fn test_mm256_broadcastsi128_si256() {
4336 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4337 let res = _mm256_broadcastsi128_si256(a);
4338 let retval = _mm256_setr_epi64x(
4339 0x0987654321012334,
4340 0x5678909876543210,
4341 0x0987654321012334,
4342 0x5678909876543210,
4343 );
4344 assert_eq_m256i(res, retval);
4345 }
4346
4347 #[simd_test(enable = "avx2")]
4348 const fn test_mm_broadcastss_ps() {
4349 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4350 let res = _mm_broadcastss_ps(a);
4351 assert_eq_m128(res, _mm_set1_ps(6.88));
4352 }
4353
4354 #[simd_test(enable = "avx2")]
4355 const fn test_mm256_broadcastss_ps() {
4356 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4357 let res = _mm256_broadcastss_ps(a);
4358 assert_eq_m256(res, _mm256_set1_ps(6.88));
4359 }
4360
4361 #[simd_test(enable = "avx2")]
4362 const fn test_mm_broadcastw_epi16() {
4363 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4364 let res = _mm_broadcastw_epi16(a);
4365 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4366 }
4367
4368 #[simd_test(enable = "avx2")]
4369 const fn test_mm256_broadcastw_epi16() {
4370 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4371 let res = _mm256_broadcastw_epi16(a);
4372 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4373 }
4374
4375 #[simd_test(enable = "avx2")]
4376 const fn test_mm256_cmpeq_epi8() {
4377 #[rustfmt::skip]
4378 let a = _mm256_setr_epi8(
4379 0, 1, 2, 3, 4, 5, 6, 7,
4380 8, 9, 10, 11, 12, 13, 14, 15,
4381 16, 17, 18, 19, 20, 21, 22, 23,
4382 24, 25, 26, 27, 28, 29, 30, 31,
4383 );
4384 #[rustfmt::skip]
4385 let b = _mm256_setr_epi8(
4386 31, 30, 2, 28, 27, 26, 25, 24,
4387 23, 22, 21, 20, 19, 18, 17, 16,
4388 15, 14, 13, 12, 11, 10, 9, 8,
4389 7, 6, 5, 4, 3, 2, 1, 0,
4390 );
4391 let r = _mm256_cmpeq_epi8(a, b);
4392 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4393 }
4394
4395 #[simd_test(enable = "avx2")]
4396 const fn test_mm256_cmpeq_epi16() {
4397 #[rustfmt::skip]
4398 let a = _mm256_setr_epi16(
4399 0, 1, 2, 3, 4, 5, 6, 7,
4400 8, 9, 10, 11, 12, 13, 14, 15,
4401 );
4402 #[rustfmt::skip]
4403 let b = _mm256_setr_epi16(
4404 15, 14, 2, 12, 11, 10, 9, 8,
4405 7, 6, 5, 4, 3, 2, 1, 0,
4406 );
4407 let r = _mm256_cmpeq_epi16(a, b);
4408 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4409 }
4410
4411 #[simd_test(enable = "avx2")]
4412 const fn test_mm256_cmpeq_epi32() {
4413 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4414 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4415 let r = _mm256_cmpeq_epi32(a, b);
4416 let e = _mm256_set1_epi32(0);
4417 let e = _mm256_insert_epi32::<2>(e, !0);
4418 assert_eq_m256i(r, e);
4419 }
4420
4421 #[simd_test(enable = "avx2")]
4422 const fn test_mm256_cmpeq_epi64() {
4423 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4424 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4425 let r = _mm256_cmpeq_epi64(a, b);
4426 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4427 }
4428
4429 #[simd_test(enable = "avx2")]
4430 const fn test_mm256_cmpgt_epi8() {
4431 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4432 let b = _mm256_set1_epi8(0);
4433 let r = _mm256_cmpgt_epi8(a, b);
4434 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4435 }
4436
4437 #[simd_test(enable = "avx2")]
4438 const fn test_mm256_cmpgt_epi16() {
4439 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4440 let b = _mm256_set1_epi16(0);
4441 let r = _mm256_cmpgt_epi16(a, b);
4442 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 const fn test_mm256_cmpgt_epi32() {
4447 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4448 let b = _mm256_set1_epi32(0);
4449 let r = _mm256_cmpgt_epi32(a, b);
4450 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4451 }
4452
4453 #[simd_test(enable = "avx2")]
4454 const fn test_mm256_cmpgt_epi64() {
4455 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4456 let b = _mm256_set1_epi64x(0);
4457 let r = _mm256_cmpgt_epi64(a, b);
4458 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4459 }
4460
4461 #[simd_test(enable = "avx2")]
4462 const fn test_mm256_cvtepi8_epi16() {
4463 #[rustfmt::skip]
4464 let a = _mm_setr_epi8(
4465 0, 0, -1, 1, -2, 2, -3, 3,
4466 -4, 4, -5, 5, -6, 6, -7, 7,
4467 );
4468 #[rustfmt::skip]
4469 let r = _mm256_setr_epi16(
4470 0, 0, -1, 1, -2, 2, -3, 3,
4471 -4, 4, -5, 5, -6, 6, -7, 7,
4472 );
4473 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4474 }
4475
4476 #[simd_test(enable = "avx2")]
4477 const fn test_mm256_cvtepi8_epi32() {
4478 #[rustfmt::skip]
4479 let a = _mm_setr_epi8(
4480 0, 0, -1, 1, -2, 2, -3, 3,
4481 -4, 4, -5, 5, -6, 6, -7, 7,
4482 );
4483 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4484 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4485 }
4486
4487 #[simd_test(enable = "avx2")]
4488 const fn test_mm256_cvtepi8_epi64() {
4489 #[rustfmt::skip]
4490 let a = _mm_setr_epi8(
4491 0, 0, -1, 1, -2, 2, -3, 3,
4492 -4, 4, -5, 5, -6, 6, -7, 7,
4493 );
4494 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4495 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4496 }
4497
4498 #[simd_test(enable = "avx2")]
4499 const fn test_mm256_cvtepi16_epi32() {
4500 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4501 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4502 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4503 }
4504
4505 #[simd_test(enable = "avx2")]
4506 const fn test_mm256_cvtepi16_epi64() {
4507 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4508 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4509 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4510 }
4511
4512 #[simd_test(enable = "avx2")]
4513 const fn test_mm256_cvtepi32_epi64() {
4514 let a = _mm_setr_epi32(0, 0, -1, 1);
4515 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4516 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4517 }
4518
4519 #[simd_test(enable = "avx2")]
4520 const fn test_mm256_cvtepu16_epi32() {
4521 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4522 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4523 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4524 }
4525
4526 #[simd_test(enable = "avx2")]
4527 const fn test_mm256_cvtepu16_epi64() {
4528 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4529 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4530 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4531 }
4532
4533 #[simd_test(enable = "avx2")]
4534 const fn test_mm256_cvtepu32_epi64() {
4535 let a = _mm_setr_epi32(0, 1, 2, 3);
4536 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4537 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4538 }
4539
4540 #[simd_test(enable = "avx2")]
4541 const fn test_mm256_cvtepu8_epi16() {
4542 #[rustfmt::skip]
4543 let a = _mm_setr_epi8(
4544 0, 1, 2, 3, 4, 5, 6, 7,
4545 8, 9, 10, 11, 12, 13, 14, 15,
4546 );
4547 #[rustfmt::skip]
4548 let r = _mm256_setr_epi16(
4549 0, 1, 2, 3, 4, 5, 6, 7,
4550 8, 9, 10, 11, 12, 13, 14, 15,
4551 );
4552 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4553 }
4554
4555 #[simd_test(enable = "avx2")]
4556 const fn test_mm256_cvtepu8_epi32() {
4557 #[rustfmt::skip]
4558 let a = _mm_setr_epi8(
4559 0, 1, 2, 3, 4, 5, 6, 7,
4560 8, 9, 10, 11, 12, 13, 14, 15,
4561 );
4562 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4563 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4564 }
4565
4566 #[simd_test(enable = "avx2")]
4567 const fn test_mm256_cvtepu8_epi64() {
4568 #[rustfmt::skip]
4569 let a = _mm_setr_epi8(
4570 0, 1, 2, 3, 4, 5, 6, 7,
4571 8, 9, 10, 11, 12, 13, 14, 15,
4572 );
4573 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4574 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 const fn test_mm256_extracti128_si256() {
4579 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4580 let r = _mm256_extracti128_si256::<1>(a);
4581 let e = _mm_setr_epi64x(3, 4);
4582 assert_eq_m128i(r, e);
4583 }
4584
4585 #[simd_test(enable = "avx2")]
4586 const fn test_mm256_hadd_epi16() {
4587 let a = _mm256_set1_epi16(2);
4588 let b = _mm256_set1_epi16(4);
4589 let r = _mm256_hadd_epi16(a, b);
4590 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4591 assert_eq_m256i(r, e);
4592 }
4593
4594 #[simd_test(enable = "avx2")]
4595 const fn test_mm256_hadd_epi32() {
4596 let a = _mm256_set1_epi32(2);
4597 let b = _mm256_set1_epi32(4);
4598 let r = _mm256_hadd_epi32(a, b);
4599 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4600 assert_eq_m256i(r, e);
4601 }
4602
4603 #[simd_test(enable = "avx2")]
4604 fn test_mm256_hadds_epi16() {
4605 let a = _mm256_set1_epi16(2);
4606 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4607 let a = _mm256_insert_epi16::<1>(a, 1);
4608 let b = _mm256_set1_epi16(4);
4609 let r = _mm256_hadds_epi16(a, b);
4610 #[rustfmt::skip]
4611 let e = _mm256_setr_epi16(
4612 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4613 4, 4, 4, 4, 8, 8, 8, 8,
4614 );
4615 assert_eq_m256i(r, e);
4616 }
4617
4618 #[simd_test(enable = "avx2")]
4619 const fn test_mm256_hsub_epi16() {
4620 let a = _mm256_set1_epi16(2);
4621 let b = _mm256_set1_epi16(4);
4622 let r = _mm256_hsub_epi16(a, b);
4623 let e = _mm256_set1_epi16(0);
4624 assert_eq_m256i(r, e);
4625 }
4626
4627 #[simd_test(enable = "avx2")]
4628 const fn test_mm256_hsub_epi32() {
4629 let a = _mm256_set1_epi32(2);
4630 let b = _mm256_set1_epi32(4);
4631 let r = _mm256_hsub_epi32(a, b);
4632 let e = _mm256_set1_epi32(0);
4633 assert_eq_m256i(r, e);
4634 }
4635
4636 #[simd_test(enable = "avx2")]
4637 fn test_mm256_hsubs_epi16() {
4638 let a = _mm256_set1_epi16(2);
4639 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4640 let a = _mm256_insert_epi16::<1>(a, -1);
4641 let b = _mm256_set1_epi16(4);
4642 let r = _mm256_hsubs_epi16(a, b);
4643 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4644 assert_eq_m256i(r, e);
4645 }
4646
4647 #[simd_test(enable = "avx2")]
4648 fn test_mm256_madd_epi16() {
4649 let a = _mm256_set1_epi16(2);
4650 let b = _mm256_set1_epi16(4);
4651 let r = _mm256_madd_epi16(a, b);
4652 let e = _mm256_set1_epi32(16);
4653 assert_eq_m256i(r, e);
4654 }
4655
4656 #[simd_test(enable = "avx2")]
4657 const fn test_mm256_inserti128_si256() {
4658 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4659 let b = _mm_setr_epi64x(7, 8);
4660 let r = _mm256_inserti128_si256::<1>(a, b);
4661 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4662 assert_eq_m256i(r, e);
4663 }
4664
4665 #[simd_test(enable = "avx2")]
4666 fn test_mm256_maddubs_epi16() {
4667 let a = _mm256_set1_epi8(2);
4668 let b = _mm256_set1_epi8(4);
4669 let r = _mm256_maddubs_epi16(a, b);
4670 let e = _mm256_set1_epi16(16);
4671 assert_eq_m256i(r, e);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 const unsafe fn test_mm_maskload_epi32() {
4676 let nums = [1, 2, 3, 4];
4677 let a = &nums as *const i32;
4678 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4679 let r = _mm_maskload_epi32(a, mask);
4680 let e = _mm_setr_epi32(1, 0, 0, 4);
4681 assert_eq_m128i(r, e);
4682 }
4683
4684 #[simd_test(enable = "avx2")]
4685 const unsafe fn test_mm256_maskload_epi32() {
4686 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4687 let a = &nums as *const i32;
4688 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4689 let r = _mm256_maskload_epi32(a, mask);
4690 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4691 assert_eq_m256i(r, e);
4692 }
4693
4694 #[simd_test(enable = "avx2")]
4695 const unsafe fn test_mm_maskload_epi64() {
4696 let nums = [1_i64, 2_i64];
4697 let a = &nums as *const i64;
4698 let mask = _mm_setr_epi64x(0, -1);
4699 let r = _mm_maskload_epi64(a, mask);
4700 let e = _mm_setr_epi64x(0, 2);
4701 assert_eq_m128i(r, e);
4702 }
4703
4704 #[simd_test(enable = "avx2")]
4705 const unsafe fn test_mm256_maskload_epi64() {
4706 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4707 let a = &nums as *const i64;
4708 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4709 let r = _mm256_maskload_epi64(a, mask);
4710 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4711 assert_eq_m256i(r, e);
4712 }
4713
4714 #[simd_test(enable = "avx2")]
4715 const unsafe fn test_mm_maskstore_epi32() {
4716 let a = _mm_setr_epi32(1, 2, 3, 4);
4717 let mut arr = [-1, -1, -1, -1];
4718 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4719 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4720 let e = [1, -1, -1, 4];
4721 assert_eq!(arr, e);
4722 }
4723
4724 #[simd_test(enable = "avx2")]
4725 const unsafe fn test_mm256_maskstore_epi32() {
4726 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4727 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4728 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4729 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4730 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4731 assert_eq!(arr, e);
4732 }
4733
4734 #[simd_test(enable = "avx2")]
4735 const unsafe fn test_mm_maskstore_epi64() {
4736 let a = _mm_setr_epi64x(1_i64, 2_i64);
4737 let mut arr = [-1_i64, -1_i64];
4738 let mask = _mm_setr_epi64x(0, -1);
4739 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4740 let e = [-1, 2];
4741 assert_eq!(arr, e);
4742 }
4743
4744 #[simd_test(enable = "avx2")]
4745 const unsafe fn test_mm256_maskstore_epi64() {
4746 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4747 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4748 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4749 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4750 let e = [-1, 2, 3, -1];
4751 assert_eq!(arr, e);
4752 }
4753
4754 #[simd_test(enable = "avx2")]
4755 const fn test_mm256_max_epi16() {
4756 let a = _mm256_set1_epi16(2);
4757 let b = _mm256_set1_epi16(4);
4758 let r = _mm256_max_epi16(a, b);
4759 assert_eq_m256i(r, b);
4760 }
4761
4762 #[simd_test(enable = "avx2")]
4763 const fn test_mm256_max_epi32() {
4764 let a = _mm256_set1_epi32(2);
4765 let b = _mm256_set1_epi32(4);
4766 let r = _mm256_max_epi32(a, b);
4767 assert_eq_m256i(r, b);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 const fn test_mm256_max_epi8() {
4772 let a = _mm256_set1_epi8(2);
4773 let b = _mm256_set1_epi8(4);
4774 let r = _mm256_max_epi8(a, b);
4775 assert_eq_m256i(r, b);
4776 }
4777
4778 #[simd_test(enable = "avx2")]
4779 const fn test_mm256_max_epu16() {
4780 let a = _mm256_set1_epi16(2);
4781 let b = _mm256_set1_epi16(4);
4782 let r = _mm256_max_epu16(a, b);
4783 assert_eq_m256i(r, b);
4784 }
4785
4786 #[simd_test(enable = "avx2")]
4787 const fn test_mm256_max_epu32() {
4788 let a = _mm256_set1_epi32(2);
4789 let b = _mm256_set1_epi32(4);
4790 let r = _mm256_max_epu32(a, b);
4791 assert_eq_m256i(r, b);
4792 }
4793
4794 #[simd_test(enable = "avx2")]
4795 const fn test_mm256_max_epu8() {
4796 let a = _mm256_set1_epi8(2);
4797 let b = _mm256_set1_epi8(4);
4798 let r = _mm256_max_epu8(a, b);
4799 assert_eq_m256i(r, b);
4800 }
4801
4802 #[simd_test(enable = "avx2")]
4803 const fn test_mm256_min_epi16() {
4804 let a = _mm256_set1_epi16(2);
4805 let b = _mm256_set1_epi16(4);
4806 let r = _mm256_min_epi16(a, b);
4807 assert_eq_m256i(r, a);
4808 }
4809
4810 #[simd_test(enable = "avx2")]
4811 const fn test_mm256_min_epi32() {
4812 let a = _mm256_set1_epi32(2);
4813 let b = _mm256_set1_epi32(4);
4814 let r = _mm256_min_epi32(a, b);
4815 assert_eq_m256i(r, a);
4816 }
4817
4818 #[simd_test(enable = "avx2")]
4819 const fn test_mm256_min_epi8() {
4820 let a = _mm256_set1_epi8(2);
4821 let b = _mm256_set1_epi8(4);
4822 let r = _mm256_min_epi8(a, b);
4823 assert_eq_m256i(r, a);
4824 }
4825
4826 #[simd_test(enable = "avx2")]
4827 const fn test_mm256_min_epu16() {
4828 let a = _mm256_set1_epi16(2);
4829 let b = _mm256_set1_epi16(4);
4830 let r = _mm256_min_epu16(a, b);
4831 assert_eq_m256i(r, a);
4832 }
4833
4834 #[simd_test(enable = "avx2")]
4835 const fn test_mm256_min_epu32() {
4836 let a = _mm256_set1_epi32(2);
4837 let b = _mm256_set1_epi32(4);
4838 let r = _mm256_min_epu32(a, b);
4839 assert_eq_m256i(r, a);
4840 }
4841
4842 #[simd_test(enable = "avx2")]
4843 const fn test_mm256_min_epu8() {
4844 let a = _mm256_set1_epi8(2);
4845 let b = _mm256_set1_epi8(4);
4846 let r = _mm256_min_epu8(a, b);
4847 assert_eq_m256i(r, a);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 const fn test_mm256_movemask_epi8() {
4852 let a = _mm256_set1_epi8(-1);
4853 let r = _mm256_movemask_epi8(a);
4854 let e = -1;
4855 assert_eq!(r, e);
4856 }
4857
4858 #[simd_test(enable = "avx2")]
4859 fn test_mm256_mpsadbw_epu8() {
4860 let a = _mm256_set1_epi8(2);
4861 let b = _mm256_set1_epi8(4);
4862 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4863 let e = _mm256_set1_epi16(8);
4864 assert_eq_m256i(r, e);
4865 }
4866
4867 #[simd_test(enable = "avx2")]
4868 const fn test_mm256_mul_epi32() {
4869 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4870 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4871 let r = _mm256_mul_epi32(a, b);
4872 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4873 assert_eq_m256i(r, e);
4874 }
4875
4876 #[simd_test(enable = "avx2")]
4877 const fn test_mm256_mul_epu32() {
4878 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4879 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4880 let r = _mm256_mul_epu32(a, b);
4881 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4882 assert_eq_m256i(r, e);
4883 }
4884
4885 #[simd_test(enable = "avx2")]
4886 const fn test_mm256_mulhi_epi16() {
4887 let a = _mm256_set1_epi16(6535);
4888 let b = _mm256_set1_epi16(6535);
4889 let r = _mm256_mulhi_epi16(a, b);
4890 let e = _mm256_set1_epi16(651);
4891 assert_eq_m256i(r, e);
4892 }
4893
4894 #[simd_test(enable = "avx2")]
4895 const fn test_mm256_mulhi_epu16() {
4896 let a = _mm256_set1_epi16(6535);
4897 let b = _mm256_set1_epi16(6535);
4898 let r = _mm256_mulhi_epu16(a, b);
4899 let e = _mm256_set1_epi16(651);
4900 assert_eq_m256i(r, e);
4901 }
4902
4903 #[simd_test(enable = "avx2")]
4904 const fn test_mm256_mullo_epi16() {
4905 let a = _mm256_set1_epi16(2);
4906 let b = _mm256_set1_epi16(4);
4907 let r = _mm256_mullo_epi16(a, b);
4908 let e = _mm256_set1_epi16(8);
4909 assert_eq_m256i(r, e);
4910 }
4911
4912 #[simd_test(enable = "avx2")]
4913 const fn test_mm256_mullo_epi32() {
4914 let a = _mm256_set1_epi32(2);
4915 let b = _mm256_set1_epi32(4);
4916 let r = _mm256_mullo_epi32(a, b);
4917 let e = _mm256_set1_epi32(8);
4918 assert_eq_m256i(r, e);
4919 }
4920
4921 #[simd_test(enable = "avx2")]
4922 fn test_mm256_mulhrs_epi16() {
4923 let a = _mm256_set1_epi16(2);
4924 let b = _mm256_set1_epi16(4);
4925 let r = _mm256_mullo_epi16(a, b);
4926 let e = _mm256_set1_epi16(8);
4927 assert_eq_m256i(r, e);
4928 }
4929
4930 #[simd_test(enable = "avx2")]
4931 const fn test_mm256_or_si256() {
4932 let a = _mm256_set1_epi8(-1);
4933 let b = _mm256_set1_epi8(0);
4934 let r = _mm256_or_si256(a, b);
4935 assert_eq_m256i(r, a);
4936 }
4937
4938 #[simd_test(enable = "avx2")]
4939 fn test_mm256_packs_epi16() {
4940 let a = _mm256_set1_epi16(2);
4941 let b = _mm256_set1_epi16(4);
4942 let r = _mm256_packs_epi16(a, b);
4943 #[rustfmt::skip]
4944 let e = _mm256_setr_epi8(
4945 2, 2, 2, 2, 2, 2, 2, 2,
4946 4, 4, 4, 4, 4, 4, 4, 4,
4947 2, 2, 2, 2, 2, 2, 2, 2,
4948 4, 4, 4, 4, 4, 4, 4, 4,
4949 );
4950
4951 assert_eq_m256i(r, e);
4952 }
4953
4954 #[simd_test(enable = "avx2")]
4955 fn test_mm256_packs_epi32() {
4956 let a = _mm256_set1_epi32(2);
4957 let b = _mm256_set1_epi32(4);
4958 let r = _mm256_packs_epi32(a, b);
4959 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4960
4961 assert_eq_m256i(r, e);
4962 }
4963
4964 #[simd_test(enable = "avx2")]
4965 fn test_mm256_packus_epi16() {
4966 let a = _mm256_set1_epi16(2);
4967 let b = _mm256_set1_epi16(4);
4968 let r = _mm256_packus_epi16(a, b);
4969 #[rustfmt::skip]
4970 let e = _mm256_setr_epi8(
4971 2, 2, 2, 2, 2, 2, 2, 2,
4972 4, 4, 4, 4, 4, 4, 4, 4,
4973 2, 2, 2, 2, 2, 2, 2, 2,
4974 4, 4, 4, 4, 4, 4, 4, 4,
4975 );
4976
4977 assert_eq_m256i(r, e);
4978 }
4979
4980 #[simd_test(enable = "avx2")]
4981 fn test_mm256_packus_epi32() {
4982 let a = _mm256_set1_epi32(2);
4983 let b = _mm256_set1_epi32(4);
4984 let r = _mm256_packus_epi32(a, b);
4985 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4986
4987 assert_eq_m256i(r, e);
4988 }
4989
4990 #[simd_test(enable = "avx2")]
4991 fn test_mm256_sad_epu8() {
4992 let a = _mm256_set1_epi8(2);
4993 let b = _mm256_set1_epi8(4);
4994 let r = _mm256_sad_epu8(a, b);
4995 let e = _mm256_set1_epi64x(16);
4996 assert_eq_m256i(r, e);
4997 }
4998
4999 #[simd_test(enable = "avx2")]
5000 const fn test_mm256_shufflehi_epi16() {
5001 #[rustfmt::skip]
5002 let a = _mm256_setr_epi16(
5003 0, 1, 2, 3, 11, 22, 33, 44,
5004 4, 5, 6, 7, 55, 66, 77, 88,
5005 );
5006 #[rustfmt::skip]
5007 let e = _mm256_setr_epi16(
5008 0, 1, 2, 3, 44, 22, 22, 11,
5009 4, 5, 6, 7, 88, 66, 66, 55,
5010 );
5011 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5012 assert_eq_m256i(r, e);
5013 }
5014
5015 #[simd_test(enable = "avx2")]
5016 const fn test_mm256_shufflelo_epi16() {
5017 #[rustfmt::skip]
5018 let a = _mm256_setr_epi16(
5019 11, 22, 33, 44, 0, 1, 2, 3,
5020 55, 66, 77, 88, 4, 5, 6, 7,
5021 );
5022 #[rustfmt::skip]
5023 let e = _mm256_setr_epi16(
5024 44, 22, 22, 11, 0, 1, 2, 3,
5025 88, 66, 66, 55, 4, 5, 6, 7,
5026 );
5027 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5028 assert_eq_m256i(r, e);
5029 }
5030
5031 #[simd_test(enable = "avx2")]
5032 fn test_mm256_sign_epi16() {
5033 let a = _mm256_set1_epi16(2);
5034 let b = _mm256_set1_epi16(-1);
5035 let r = _mm256_sign_epi16(a, b);
5036 let e = _mm256_set1_epi16(-2);
5037 assert_eq_m256i(r, e);
5038 }
5039
5040 #[simd_test(enable = "avx2")]
5041 fn test_mm256_sign_epi32() {
5042 let a = _mm256_set1_epi32(2);
5043 let b = _mm256_set1_epi32(-1);
5044 let r = _mm256_sign_epi32(a, b);
5045 let e = _mm256_set1_epi32(-2);
5046 assert_eq_m256i(r, e);
5047 }
5048
5049 #[simd_test(enable = "avx2")]
5050 fn test_mm256_sign_epi8() {
5051 let a = _mm256_set1_epi8(2);
5052 let b = _mm256_set1_epi8(-1);
5053 let r = _mm256_sign_epi8(a, b);
5054 let e = _mm256_set1_epi8(-2);
5055 assert_eq_m256i(r, e);
5056 }
5057
5058 #[simd_test(enable = "avx2")]
5059 fn test_mm256_sll_epi16() {
5060 let a = _mm256_set1_epi16(0xFF);
5061 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5062 let r = _mm256_sll_epi16(a, b);
5063 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5064 }
5065
5066 #[simd_test(enable = "avx2")]
5067 fn test_mm256_sll_epi32() {
5068 let a = _mm256_set1_epi32(0xFFFF);
5069 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5070 let r = _mm256_sll_epi32(a, b);
5071 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5072 }
5073
5074 #[simd_test(enable = "avx2")]
5075 fn test_mm256_sll_epi64() {
5076 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5077 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5078 let r = _mm256_sll_epi64(a, b);
5079 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5080 }
5081
5082 #[simd_test(enable = "avx2")]
5083 const fn test_mm256_slli_epi16() {
5084 assert_eq_m256i(
5085 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5086 _mm256_set1_epi16(0xFF0),
5087 );
5088 }
5089
5090 #[simd_test(enable = "avx2")]
5091 const fn test_mm256_slli_epi32() {
5092 assert_eq_m256i(
5093 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5094 _mm256_set1_epi32(0xFFFF0),
5095 );
5096 }
5097
5098 #[simd_test(enable = "avx2")]
5099 const fn test_mm256_slli_epi64() {
5100 assert_eq_m256i(
5101 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5102 _mm256_set1_epi64x(0xFFFFFFFF0),
5103 );
5104 }
5105
5106 #[simd_test(enable = "avx2")]
5107 const fn test_mm256_slli_si256() {
5108 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5109 let r = _mm256_slli_si256::<3>(a);
5110 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5111 }
5112
5113 #[simd_test(enable = "avx2")]
5114 const fn test_mm_sllv_epi32() {
5115 let a = _mm_set1_epi32(2);
5116 let b = _mm_set1_epi32(1);
5117 let r = _mm_sllv_epi32(a, b);
5118 let e = _mm_set1_epi32(4);
5119 assert_eq_m128i(r, e);
5120 }
5121
5122 #[simd_test(enable = "avx2")]
5123 const fn test_mm256_sllv_epi32() {
5124 let a = _mm256_set1_epi32(2);
5125 let b = _mm256_set1_epi32(1);
5126 let r = _mm256_sllv_epi32(a, b);
5127 let e = _mm256_set1_epi32(4);
5128 assert_eq_m256i(r, e);
5129 }
5130
5131 #[simd_test(enable = "avx2")]
5132 const fn test_mm_sllv_epi64() {
5133 let a = _mm_set1_epi64x(2);
5134 let b = _mm_set1_epi64x(1);
5135 let r = _mm_sllv_epi64(a, b);
5136 let e = _mm_set1_epi64x(4);
5137 assert_eq_m128i(r, e);
5138 }
5139
5140 #[simd_test(enable = "avx2")]
5141 const fn test_mm256_sllv_epi64() {
5142 let a = _mm256_set1_epi64x(2);
5143 let b = _mm256_set1_epi64x(1);
5144 let r = _mm256_sllv_epi64(a, b);
5145 let e = _mm256_set1_epi64x(4);
5146 assert_eq_m256i(r, e);
5147 }
5148
5149 #[simd_test(enable = "avx2")]
5150 fn test_mm256_sra_epi16() {
5151 let a = _mm256_set1_epi16(-1);
5152 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5153 let r = _mm256_sra_epi16(a, b);
5154 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5155 }
5156
5157 #[simd_test(enable = "avx2")]
5158 fn test_mm256_sra_epi32() {
5159 let a = _mm256_set1_epi32(-1);
5160 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5161 let r = _mm256_sra_epi32(a, b);
5162 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5163 }
5164
5165 #[simd_test(enable = "avx2")]
5166 const fn test_mm256_srai_epi16() {
5167 assert_eq_m256i(
5168 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5169 _mm256_set1_epi16(-1),
5170 );
5171 }
5172
5173 #[simd_test(enable = "avx2")]
5174 const fn test_mm256_srai_epi32() {
5175 assert_eq_m256i(
5176 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5177 _mm256_set1_epi32(-1),
5178 );
5179 }
5180
5181 #[simd_test(enable = "avx2")]
5182 const fn test_mm_srav_epi32() {
5183 let a = _mm_set1_epi32(4);
5184 let count = _mm_set1_epi32(1);
5185 let r = _mm_srav_epi32(a, count);
5186 let e = _mm_set1_epi32(2);
5187 assert_eq_m128i(r, e);
5188 }
5189
5190 #[simd_test(enable = "avx2")]
5191 const fn test_mm256_srav_epi32() {
5192 let a = _mm256_set1_epi32(4);
5193 let count = _mm256_set1_epi32(1);
5194 let r = _mm256_srav_epi32(a, count);
5195 let e = _mm256_set1_epi32(2);
5196 assert_eq_m256i(r, e);
5197 }
5198
5199 #[simd_test(enable = "avx2")]
5200 const fn test_mm256_srli_si256() {
5201 #[rustfmt::skip]
5202 let a = _mm256_setr_epi8(
5203 1, 2, 3, 4, 5, 6, 7, 8,
5204 9, 10, 11, 12, 13, 14, 15, 16,
5205 17, 18, 19, 20, 21, 22, 23, 24,
5206 25, 26, 27, 28, 29, 30, 31, 32,
5207 );
5208 let r = _mm256_srli_si256::<3>(a);
5209 #[rustfmt::skip]
5210 let e = _mm256_setr_epi8(
5211 4, 5, 6, 7, 8, 9, 10, 11,
5212 12, 13, 14, 15, 16, 0, 0, 0,
5213 20, 21, 22, 23, 24, 25, 26, 27,
5214 28, 29, 30, 31, 32, 0, 0, 0,
5215 );
5216 assert_eq_m256i(r, e);
5217 }
5218
5219 #[simd_test(enable = "avx2")]
5220 fn test_mm256_srl_epi16() {
5221 let a = _mm256_set1_epi16(0xFF);
5222 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5223 let r = _mm256_srl_epi16(a, b);
5224 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5225 }
5226
5227 #[simd_test(enable = "avx2")]
5228 fn test_mm256_srl_epi32() {
5229 let a = _mm256_set1_epi32(0xFFFF);
5230 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5231 let r = _mm256_srl_epi32(a, b);
5232 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5233 }
5234
5235 #[simd_test(enable = "avx2")]
5236 fn test_mm256_srl_epi64() {
5237 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5238 let b = _mm_setr_epi64x(4, 0);
5239 let r = _mm256_srl_epi64(a, b);
5240 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5241 }
5242
5243 #[simd_test(enable = "avx2")]
5244 const fn test_mm256_srli_epi16() {
5245 assert_eq_m256i(
5246 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5247 _mm256_set1_epi16(0xF),
5248 );
5249 }
5250
5251 #[simd_test(enable = "avx2")]
5252 const fn test_mm256_srli_epi32() {
5253 assert_eq_m256i(
5254 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5255 _mm256_set1_epi32(0xFFF),
5256 );
5257 }
5258
5259 #[simd_test(enable = "avx2")]
5260 const fn test_mm256_srli_epi64() {
5261 assert_eq_m256i(
5262 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5263 _mm256_set1_epi64x(0xFFFFFFF),
5264 );
5265 }
5266
5267 #[simd_test(enable = "avx2")]
5268 const fn test_mm_srlv_epi32() {
5269 let a = _mm_set1_epi32(2);
5270 let count = _mm_set1_epi32(1);
5271 let r = _mm_srlv_epi32(a, count);
5272 let e = _mm_set1_epi32(1);
5273 assert_eq_m128i(r, e);
5274 }
5275
5276 #[simd_test(enable = "avx2")]
5277 const fn test_mm256_srlv_epi32() {
5278 let a = _mm256_set1_epi32(2);
5279 let count = _mm256_set1_epi32(1);
5280 let r = _mm256_srlv_epi32(a, count);
5281 let e = _mm256_set1_epi32(1);
5282 assert_eq_m256i(r, e);
5283 }
5284
5285 #[simd_test(enable = "avx2")]
5286 const fn test_mm_srlv_epi64() {
5287 let a = _mm_set1_epi64x(2);
5288 let count = _mm_set1_epi64x(1);
5289 let r = _mm_srlv_epi64(a, count);
5290 let e = _mm_set1_epi64x(1);
5291 assert_eq_m128i(r, e);
5292 }
5293
5294 #[simd_test(enable = "avx2")]
5295 const fn test_mm256_srlv_epi64() {
5296 let a = _mm256_set1_epi64x(2);
5297 let count = _mm256_set1_epi64x(1);
5298 let r = _mm256_srlv_epi64(a, count);
5299 let e = _mm256_set1_epi64x(1);
5300 assert_eq_m256i(r, e);
5301 }
5302
5303 #[simd_test(enable = "avx2")]
5304 unsafe fn test_mm256_stream_load_si256() {
5305 let a = _mm256_set_epi64x(5, 6, 7, 8);
5306 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5307 assert_eq_m256i(a, r);
5308 }
5309
5310 #[simd_test(enable = "avx2")]
5311 const fn test_mm256_sub_epi16() {
5312 let a = _mm256_set1_epi16(4);
5313 let b = _mm256_set1_epi16(2);
5314 let r = _mm256_sub_epi16(a, b);
5315 assert_eq_m256i(r, b);
5316 }
5317
5318 #[simd_test(enable = "avx2")]
5319 const fn test_mm256_sub_epi32() {
5320 let a = _mm256_set1_epi32(4);
5321 let b = _mm256_set1_epi32(2);
5322 let r = _mm256_sub_epi32(a, b);
5323 assert_eq_m256i(r, b);
5324 }
5325
5326 #[simd_test(enable = "avx2")]
5327 const fn test_mm256_sub_epi64() {
5328 let a = _mm256_set1_epi64x(4);
5329 let b = _mm256_set1_epi64x(2);
5330 let r = _mm256_sub_epi64(a, b);
5331 assert_eq_m256i(r, b);
5332 }
5333
5334 #[simd_test(enable = "avx2")]
5335 const fn test_mm256_sub_epi8() {
5336 let a = _mm256_set1_epi8(4);
5337 let b = _mm256_set1_epi8(2);
5338 let r = _mm256_sub_epi8(a, b);
5339 assert_eq_m256i(r, b);
5340 }
5341
5342 #[simd_test(enable = "avx2")]
5343 const fn test_mm256_subs_epi16() {
5344 let a = _mm256_set1_epi16(4);
5345 let b = _mm256_set1_epi16(2);
5346 let r = _mm256_subs_epi16(a, b);
5347 assert_eq_m256i(r, b);
5348 }
5349
5350 #[simd_test(enable = "avx2")]
5351 const fn test_mm256_subs_epi8() {
5352 let a = _mm256_set1_epi8(4);
5353 let b = _mm256_set1_epi8(2);
5354 let r = _mm256_subs_epi8(a, b);
5355 assert_eq_m256i(r, b);
5356 }
5357
5358 #[simd_test(enable = "avx2")]
5359 const fn test_mm256_subs_epu16() {
5360 let a = _mm256_set1_epi16(4);
5361 let b = _mm256_set1_epi16(2);
5362 let r = _mm256_subs_epu16(a, b);
5363 assert_eq_m256i(r, b);
5364 }
5365
5366 #[simd_test(enable = "avx2")]
5367 const fn test_mm256_subs_epu8() {
5368 let a = _mm256_set1_epi8(4);
5369 let b = _mm256_set1_epi8(2);
5370 let r = _mm256_subs_epu8(a, b);
5371 assert_eq_m256i(r, b);
5372 }
5373
5374 #[simd_test(enable = "avx2")]
5375 const fn test_mm256_xor_si256() {
5376 let a = _mm256_set1_epi8(5);
5377 let b = _mm256_set1_epi8(3);
5378 let r = _mm256_xor_si256(a, b);
5379 assert_eq_m256i(r, _mm256_set1_epi8(6));
5380 }
5381
5382 #[simd_test(enable = "avx2")]
5383 const fn test_mm256_alignr_epi8() {
5384 #[rustfmt::skip]
5385 let a = _mm256_setr_epi8(
5386 1, 2, 3, 4, 5, 6, 7, 8,
5387 9, 10, 11, 12, 13, 14, 15, 16,
5388 17, 18, 19, 20, 21, 22, 23, 24,
5389 25, 26, 27, 28, 29, 30, 31, 32,
5390 );
5391 #[rustfmt::skip]
5392 let b = _mm256_setr_epi8(
5393 -1, -2, -3, -4, -5, -6, -7, -8,
5394 -9, -10, -11, -12, -13, -14, -15, -16,
5395 -17, -18, -19, -20, -21, -22, -23, -24,
5396 -25, -26, -27, -28, -29, -30, -31, -32,
5397 );
5398 let r = _mm256_alignr_epi8::<33>(a, b);
5399 assert_eq_m256i(r, _mm256_set1_epi8(0));
5400
5401 let r = _mm256_alignr_epi8::<17>(a, b);
5402 #[rustfmt::skip]
5403 let expected = _mm256_setr_epi8(
5404 2, 3, 4, 5, 6, 7, 8, 9,
5405 10, 11, 12, 13, 14, 15, 16, 0,
5406 18, 19, 20, 21, 22, 23, 24, 25,
5407 26, 27, 28, 29, 30, 31, 32, 0,
5408 );
5409 assert_eq_m256i(r, expected);
5410
5411 let r = _mm256_alignr_epi8::<4>(a, b);
5412 #[rustfmt::skip]
5413 let expected = _mm256_setr_epi8(
5414 -5, -6, -7, -8, -9, -10, -11, -12,
5415 -13, -14, -15, -16, 1, 2, 3, 4,
5416 -21, -22, -23, -24, -25, -26, -27, -28,
5417 -29, -30, -31, -32, 17, 18, 19, 20,
5418 );
5419 assert_eq_m256i(r, expected);
5420
5421 let r = _mm256_alignr_epi8::<15>(a, b);
5422 #[rustfmt::skip]
5423 let expected = _mm256_setr_epi8(
5424 -16, 1, 2, 3, 4, 5, 6, 7,
5425 8, 9, 10, 11, 12, 13, 14, 15,
5426 -32, 17, 18, 19, 20, 21, 22, 23,
5427 24, 25, 26, 27, 28, 29, 30, 31,
5428 );
5429 assert_eq_m256i(r, expected);
5430
5431 let r = _mm256_alignr_epi8::<0>(a, b);
5432 assert_eq_m256i(r, b);
5433
5434 let r = _mm256_alignr_epi8::<16>(a, b);
5435 assert_eq_m256i(r, a);
5436 }
5437
5438 #[simd_test(enable = "avx2")]
5439 fn test_mm256_shuffle_epi8() {
5440 #[rustfmt::skip]
5441 let a = _mm256_setr_epi8(
5442 1, 2, 3, 4, 5, 6, 7, 8,
5443 9, 10, 11, 12, 13, 14, 15, 16,
5444 17, 18, 19, 20, 21, 22, 23, 24,
5445 25, 26, 27, 28, 29, 30, 31, 32,
5446 );
5447 #[rustfmt::skip]
5448 let b = _mm256_setr_epi8(
5449 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5450 12, 5, 5, 10, 4, 1, 8, 0,
5451 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5452 12, 5, 5, 10, 4, 1, 8, 0,
5453 );
5454 #[rustfmt::skip]
5455 let expected = _mm256_setr_epi8(
5456 5, 0, 5, 4, 9, 13, 7, 4,
5457 13, 6, 6, 11, 5, 2, 9, 1,
5458 21, 0, 21, 20, 25, 29, 23, 20,
5459 29, 22, 22, 27, 21, 18, 25, 17,
5460 );
5461 let r = _mm256_shuffle_epi8(a, b);
5462 assert_eq_m256i(r, expected);
5463 }
5464
5465 #[simd_test(enable = "avx2")]
5466 fn test_mm256_permutevar8x32_epi32() {
5467 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5468 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5469 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5470 let r = _mm256_permutevar8x32_epi32(a, b);
5471 assert_eq_m256i(r, expected);
5472 }
5473
5474 #[simd_test(enable = "avx2")]
5475 const fn test_mm256_permute4x64_epi64() {
5476 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5477 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5478 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5479 assert_eq_m256i(r, expected);
5480 }
5481
5482 #[simd_test(enable = "avx2")]
5483 const fn test_mm256_permute2x128_si256() {
5484 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5485 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5486 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5487 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5488 assert_eq_m256i(r, e);
5489 }
5490
5491 #[simd_test(enable = "avx2")]
5492 const fn test_mm256_permute4x64_pd() {
5493 let a = _mm256_setr_pd(1., 2., 3., 4.);
5494 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5495 let e = _mm256_setr_pd(4., 1., 2., 1.);
5496 assert_eq_m256d(r, e);
5497 }
5498
5499 #[simd_test(enable = "avx2")]
5500 fn test_mm256_permutevar8x32_ps() {
5501 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5502 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5503 let r = _mm256_permutevar8x32_ps(a, b);
5504 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5505 assert_eq_m256(r, e);
5506 }
5507
5508 #[simd_test(enable = "avx2")]
5509 unsafe fn test_mm_i32gather_epi32() {
5510 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5511 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5513 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5514 }
5515
5516 #[simd_test(enable = "avx2")]
5517 unsafe fn test_mm_mask_i32gather_epi32() {
5518 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5519 let r = _mm_mask_i32gather_epi32::<4>(
5521 _mm_set1_epi32(256),
5522 arr.as_ptr(),
5523 _mm_setr_epi32(0, 16, 64, 96),
5524 _mm_setr_epi32(-1, -1, -1, 0),
5525 );
5526 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5527 }
5528
5529 #[simd_test(enable = "avx2")]
5530 unsafe fn test_mm256_i32gather_epi32() {
5531 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5532 let r =
5534 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5535 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5536 }
5537
5538 #[simd_test(enable = "avx2")]
5539 unsafe fn test_mm256_mask_i32gather_epi32() {
5540 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5541 let r = _mm256_mask_i32gather_epi32::<4>(
5543 _mm256_set1_epi32(256),
5544 arr.as_ptr(),
5545 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5546 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5547 );
5548 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5549 }
5550
5551 #[simd_test(enable = "avx2")]
5552 unsafe fn test_mm_i32gather_ps() {
5553 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5554 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5556 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5557 }
5558
5559 #[simd_test(enable = "avx2")]
5560 unsafe fn test_mm_mask_i32gather_ps() {
5561 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5562 let r = _mm_mask_i32gather_ps::<4>(
5564 _mm_set1_ps(256.0),
5565 arr.as_ptr(),
5566 _mm_setr_epi32(0, 16, 64, 96),
5567 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5568 );
5569 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5570 }
5571
5572 #[simd_test(enable = "avx2")]
5573 unsafe fn test_mm256_i32gather_ps() {
5574 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5575 let r =
5577 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5578 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5579 }
5580
5581 #[simd_test(enable = "avx2")]
5582 unsafe fn test_mm256_mask_i32gather_ps() {
5583 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5584 let r = _mm256_mask_i32gather_ps::<4>(
5586 _mm256_set1_ps(256.0),
5587 arr.as_ptr(),
5588 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5589 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5590 );
5591 assert_eq_m256(
5592 r,
5593 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5594 );
5595 }
5596
5597 #[simd_test(enable = "avx2")]
5598 unsafe fn test_mm_i32gather_epi64() {
5599 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5600 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5602 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5603 }
5604
5605 #[simd_test(enable = "avx2")]
5606 unsafe fn test_mm_mask_i32gather_epi64() {
5607 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5608 let r = _mm_mask_i32gather_epi64::<8>(
5610 _mm_set1_epi64x(256),
5611 arr.as_ptr(),
5612 _mm_setr_epi32(16, 16, 16, 16),
5613 _mm_setr_epi64x(-1, 0),
5614 );
5615 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5616 }
5617
5618 #[simd_test(enable = "avx2")]
5619 unsafe fn test_mm256_i32gather_epi64() {
5620 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5621 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5623 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5624 }
5625
5626 #[simd_test(enable = "avx2")]
5627 unsafe fn test_mm256_mask_i32gather_epi64() {
5628 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5629 let r = _mm256_mask_i32gather_epi64::<8>(
5631 _mm256_set1_epi64x(256),
5632 arr.as_ptr(),
5633 _mm_setr_epi32(0, 16, 64, 96),
5634 _mm256_setr_epi64x(-1, -1, -1, 0),
5635 );
5636 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5637 }
5638
5639 #[simd_test(enable = "avx2")]
5640 unsafe fn test_mm_i32gather_pd() {
5641 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5642 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5644 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5645 }
5646
5647 #[simd_test(enable = "avx2")]
5648 unsafe fn test_mm_mask_i32gather_pd() {
5649 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5650 let r = _mm_mask_i32gather_pd::<8>(
5652 _mm_set1_pd(256.0),
5653 arr.as_ptr(),
5654 _mm_setr_epi32(16, 16, 16, 16),
5655 _mm_setr_pd(-1.0, 0.0),
5656 );
5657 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5658 }
5659
5660 #[simd_test(enable = "avx2")]
5661 unsafe fn test_mm256_i32gather_pd() {
5662 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5663 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5665 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5666 }
5667
5668 #[simd_test(enable = "avx2")]
5669 unsafe fn test_mm256_mask_i32gather_pd() {
5670 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5671 let r = _mm256_mask_i32gather_pd::<8>(
5673 _mm256_set1_pd(256.0),
5674 arr.as_ptr(),
5675 _mm_setr_epi32(0, 16, 64, 96),
5676 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5677 );
5678 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5679 }
5680
5681 #[simd_test(enable = "avx2")]
5682 unsafe fn test_mm_i64gather_epi32() {
5683 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5684 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5686 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5687 }
5688
5689 #[simd_test(enable = "avx2")]
5690 unsafe fn test_mm_mask_i64gather_epi32() {
5691 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5692 let r = _mm_mask_i64gather_epi32::<4>(
5694 _mm_set1_epi32(256),
5695 arr.as_ptr(),
5696 _mm_setr_epi64x(0, 16),
5697 _mm_setr_epi32(-1, 0, -1, 0),
5698 );
5699 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5700 }
5701
5702 #[simd_test(enable = "avx2")]
5703 unsafe fn test_mm256_i64gather_epi32() {
5704 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5705 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5707 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5708 }
5709
5710 #[simd_test(enable = "avx2")]
5711 unsafe fn test_mm256_mask_i64gather_epi32() {
5712 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5713 let r = _mm256_mask_i64gather_epi32::<4>(
5715 _mm_set1_epi32(256),
5716 arr.as_ptr(),
5717 _mm256_setr_epi64x(0, 16, 64, 96),
5718 _mm_setr_epi32(-1, -1, -1, 0),
5719 );
5720 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5721 }
5722
5723 #[simd_test(enable = "avx2")]
5724 unsafe fn test_mm_i64gather_ps() {
5725 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5726 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5728 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5729 }
5730
5731 #[simd_test(enable = "avx2")]
5732 unsafe fn test_mm_mask_i64gather_ps() {
5733 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5734 let r = _mm_mask_i64gather_ps::<4>(
5736 _mm_set1_ps(256.0),
5737 arr.as_ptr(),
5738 _mm_setr_epi64x(0, 16),
5739 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5740 );
5741 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5742 }
5743
5744 #[simd_test(enable = "avx2")]
5745 unsafe fn test_mm256_i64gather_ps() {
5746 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5747 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5749 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5750 }
5751
5752 #[simd_test(enable = "avx2")]
5753 unsafe fn test_mm256_mask_i64gather_ps() {
5754 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5755 let r = _mm256_mask_i64gather_ps::<4>(
5757 _mm_set1_ps(256.0),
5758 arr.as_ptr(),
5759 _mm256_setr_epi64x(0, 16, 64, 96),
5760 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5761 );
5762 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5763 }
5764
5765 #[simd_test(enable = "avx2")]
5766 unsafe fn test_mm_i64gather_epi64() {
5767 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5768 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5770 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5771 }
5772
5773 #[simd_test(enable = "avx2")]
5774 unsafe fn test_mm_mask_i64gather_epi64() {
5775 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5776 let r = _mm_mask_i64gather_epi64::<8>(
5778 _mm_set1_epi64x(256),
5779 arr.as_ptr(),
5780 _mm_setr_epi64x(16, 16),
5781 _mm_setr_epi64x(-1, 0),
5782 );
5783 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5784 }
5785
5786 #[simd_test(enable = "avx2")]
5787 unsafe fn test_mm256_i64gather_epi64() {
5788 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5789 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5791 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5792 }
5793
5794 #[simd_test(enable = "avx2")]
5795 unsafe fn test_mm256_mask_i64gather_epi64() {
5796 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5797 let r = _mm256_mask_i64gather_epi64::<8>(
5799 _mm256_set1_epi64x(256),
5800 arr.as_ptr(),
5801 _mm256_setr_epi64x(0, 16, 64, 96),
5802 _mm256_setr_epi64x(-1, -1, -1, 0),
5803 );
5804 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5805 }
5806
5807 #[simd_test(enable = "avx2")]
5808 unsafe fn test_mm_i64gather_pd() {
5809 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5810 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5812 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5813 }
5814
5815 #[simd_test(enable = "avx2")]
5816 unsafe fn test_mm_mask_i64gather_pd() {
5817 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5818 let r = _mm_mask_i64gather_pd::<8>(
5820 _mm_set1_pd(256.0),
5821 arr.as_ptr(),
5822 _mm_setr_epi64x(16, 16),
5823 _mm_setr_pd(-1.0, 0.0),
5824 );
5825 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5826 }
5827
5828 #[simd_test(enable = "avx2")]
5829 unsafe fn test_mm256_i64gather_pd() {
5830 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5831 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5833 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5834 }
5835
5836 #[simd_test(enable = "avx2")]
5837 unsafe fn test_mm256_mask_i64gather_pd() {
5838 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5839 let r = _mm256_mask_i64gather_pd::<8>(
5841 _mm256_set1_pd(256.0),
5842 arr.as_ptr(),
5843 _mm256_setr_epi64x(0, 16, 64, 96),
5844 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5845 );
5846 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5847 }
5848
5849 #[simd_test(enable = "avx2")]
5850 const fn test_mm256_extract_epi8() {
5851 #[rustfmt::skip]
5852 let a = _mm256_setr_epi8(
5853 -1, 1, 2, 3, 4, 5, 6, 7,
5854 8, 9, 10, 11, 12, 13, 14, 15,
5855 16, 17, 18, 19, 20, 21, 22, 23,
5856 24, 25, 26, 27, 28, 29, 30, 31
5857 );
5858 let r1 = _mm256_extract_epi8::<0>(a);
5859 let r2 = _mm256_extract_epi8::<3>(a);
5860 assert_eq!(r1, 0xFF);
5861 assert_eq!(r2, 3);
5862 }
5863
5864 #[simd_test(enable = "avx2")]
5865 const fn test_mm256_extract_epi16() {
5866 #[rustfmt::skip]
5867 let a = _mm256_setr_epi16(
5868 -1, 1, 2, 3, 4, 5, 6, 7,
5869 8, 9, 10, 11, 12, 13, 14, 15,
5870 );
5871 let r1 = _mm256_extract_epi16::<0>(a);
5872 let r2 = _mm256_extract_epi16::<3>(a);
5873 assert_eq!(r1, 0xFFFF);
5874 assert_eq!(r2, 3);
5875 }
5876}