core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
19    unsafe {
20        let a = a.as_i16x32();
21        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22        transmute(simd_select(cmp, a, simd_neg(a)))
23    }
24}
25
26/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
29#[inline]
30#[target_feature(enable = "avx512bw")]
31#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32#[cfg_attr(test, assert_instr(vpabsw))]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
49pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
50    unsafe {
51        let abs = _mm512_abs_epi16(a).as_i16x32();
52        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
53    }
54}
55
56/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
59#[inline]
60#[target_feature(enable = "avx512bw,avx512vl")]
61#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62#[cfg_attr(test, assert_instr(vpabsw))]
63#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
64pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
65    unsafe {
66        let abs = _mm256_abs_epi16(a).as_i16x16();
67        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
68    }
69}
70
71/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
74#[inline]
75#[target_feature(enable = "avx512bw,avx512vl")]
76#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
77#[cfg_attr(test, assert_instr(vpabsw))]
78#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
79pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
80    unsafe {
81        let abs = _mm256_abs_epi16(a).as_i16x16();
82        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
83    }
84}
85
86/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
89#[inline]
90#[target_feature(enable = "avx512bw,avx512vl")]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92#[cfg_attr(test, assert_instr(vpabsw))]
93#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
94pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
95    unsafe {
96        let abs = _mm_abs_epi16(a).as_i16x8();
97        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
98    }
99}
100
101/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
102///
103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
104#[inline]
105#[target_feature(enable = "avx512bw,avx512vl")]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107#[cfg_attr(test, assert_instr(vpabsw))]
108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
109pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi16(a).as_i16x8();
112        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
119#[inline]
120#[target_feature(enable = "avx512bw")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsb))]
123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
124pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
125    unsafe {
126        let a = a.as_i8x64();
127        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
128        transmute(simd_select(cmp, a, simd_neg(a)))
129    }
130}
131
132/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
135#[inline]
136#[target_feature(enable = "avx512bw")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpabsb))]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
141    unsafe {
142        let abs = _mm512_abs_epi8(a).as_i8x64();
143        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
144    }
145}
146
147/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
150#[inline]
151#[target_feature(enable = "avx512bw")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpabsb))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
156    unsafe {
157        let abs = _mm512_abs_epi8(a).as_i8x64();
158        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
159    }
160}
161
162/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
165#[inline]
166#[target_feature(enable = "avx512bw,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpabsb))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
171    unsafe {
172        let abs = _mm256_abs_epi8(a).as_i8x32();
173        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
174    }
175}
176
177/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
180#[inline]
181#[target_feature(enable = "avx512bw,avx512vl")]
182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
183#[cfg_attr(test, assert_instr(vpabsb))]
184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
185pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
186    unsafe {
187        let abs = _mm256_abs_epi8(a).as_i8x32();
188        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
189    }
190}
191
192/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
195#[inline]
196#[target_feature(enable = "avx512bw,avx512vl")]
197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
198#[cfg_attr(test, assert_instr(vpabsb))]
199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
200pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
201    unsafe {
202        let abs = _mm_abs_epi8(a).as_i8x16();
203        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
204    }
205}
206
207/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
210#[inline]
211#[target_feature(enable = "avx512bw,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpabsb))]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
216    unsafe {
217        let abs = _mm_abs_epi8(a).as_i8x16();
218        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
219    }
220}
221
222/// Add packed 16-bit integers in a and b, and store the results in dst.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
225#[inline]
226#[target_feature(enable = "avx512bw")]
227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
228#[cfg_attr(test, assert_instr(vpaddw))]
229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
230pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
231    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
232}
233
234/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
237#[inline]
238#[target_feature(enable = "avx512bw")]
239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
240#[cfg_attr(test, assert_instr(vpaddw))]
241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
242pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
252#[inline]
253#[target_feature(enable = "avx512bw")]
254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
258    unsafe {
259        let add = _mm512_add_epi16(a, b).as_i16x32();
260        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
261    }
262}
263
264/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
267#[inline]
268#[target_feature(enable = "avx512bw,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpaddw))]
271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
272pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
273    unsafe {
274        let add = _mm256_add_epi16(a, b).as_i16x16();
275        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
276    }
277}
278
279/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
282#[inline]
283#[target_feature(enable = "avx512bw,avx512vl")]
284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
285#[cfg_attr(test, assert_instr(vpaddw))]
286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
287pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
288    unsafe {
289        let add = _mm256_add_epi16(a, b).as_i16x16();
290        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
291    }
292}
293
294/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
297#[inline]
298#[target_feature(enable = "avx512bw,avx512vl")]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[cfg_attr(test, assert_instr(vpaddw))]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
303    unsafe {
304        let add = _mm_add_epi16(a, b).as_i16x8();
305        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
306    }
307}
308
309/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
312#[inline]
313#[target_feature(enable = "avx512bw,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpaddw))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
318    unsafe {
319        let add = _mm_add_epi16(a, b).as_i16x8();
320        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
321    }
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
327#[inline]
328#[target_feature(enable = "avx512bw")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
333    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
339#[inline]
340#[target_feature(enable = "avx512bw")]
341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
344pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
345    unsafe {
346        let add = _mm512_add_epi8(a, b).as_i8x64();
347        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
348    }
349}
350
351/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
354#[inline]
355#[target_feature(enable = "avx512bw")]
356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
357#[cfg_attr(test, assert_instr(vpaddb))]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
360    unsafe {
361        let add = _mm512_add_epi8(a, b).as_i8x64();
362        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
363    }
364}
365
366/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
369#[inline]
370#[target_feature(enable = "avx512bw,avx512vl")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr(vpaddb))]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
375    unsafe {
376        let add = _mm256_add_epi8(a, b).as_i8x32();
377        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
378    }
379}
380
381/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
384#[inline]
385#[target_feature(enable = "avx512bw,avx512vl")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpaddb))]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
390    unsafe {
391        let add = _mm256_add_epi8(a, b).as_i8x32();
392        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
393    }
394}
395
396/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
399#[inline]
400#[target_feature(enable = "avx512bw,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpaddb))]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
405    unsafe {
406        let add = _mm_add_epi8(a, b).as_i8x16();
407        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
408    }
409}
410
411/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
414#[inline]
415#[target_feature(enable = "avx512bw,avx512vl")]
416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417#[cfg_attr(test, assert_instr(vpaddb))]
418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
419pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
420    unsafe {
421        let add = _mm_add_epi8(a, b).as_i8x16();
422        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
423    }
424}
425
426/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
427///
428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
429#[inline]
430#[target_feature(enable = "avx512bw")]
431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
432#[cfg_attr(test, assert_instr(vpaddusw))]
433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
434pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
435    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
436}
437
438/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439///
440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
441#[inline]
442#[target_feature(enable = "avx512bw")]
443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
444#[cfg_attr(test, assert_instr(vpaddusw))]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
447    unsafe {
448        let add = _mm512_adds_epu16(a, b).as_u16x32();
449        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
456#[inline]
457#[target_feature(enable = "avx512bw")]
458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
462    unsafe {
463        let add = _mm512_adds_epu16(a, b).as_u16x32();
464        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
465    }
466}
467
468/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
469///
470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
471#[inline]
472#[target_feature(enable = "avx512bw,avx512vl")]
473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
474#[cfg_attr(test, assert_instr(vpaddusw))]
475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
476pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
477    unsafe {
478        let add = _mm256_adds_epu16(a, b).as_u16x16();
479        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
480    }
481}
482
483/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
486#[inline]
487#[target_feature(enable = "avx512bw,avx512vl")]
488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
489#[cfg_attr(test, assert_instr(vpaddusw))]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
492    unsafe {
493        let add = _mm256_adds_epu16(a, b).as_u16x16();
494        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
495    }
496}
497
498/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
501#[inline]
502#[target_feature(enable = "avx512bw,avx512vl")]
503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
504#[cfg_attr(test, assert_instr(vpaddusw))]
505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
506pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507    unsafe {
508        let add = _mm_adds_epu16(a, b).as_u16x8();
509        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
510    }
511}
512
513/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
516#[inline]
517#[target_feature(enable = "avx512bw,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpaddusw))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
522    unsafe {
523        let add = _mm_adds_epu16(a, b).as_u16x8();
524        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
525    }
526}
527
528/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
531#[inline]
532#[target_feature(enable = "avx512bw")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpaddusb))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
537    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
538}
539
540/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
543#[inline]
544#[target_feature(enable = "avx512bw")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpaddusb))]
547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
548pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
549    unsafe {
550        let add = _mm512_adds_epu8(a, b).as_u8x64();
551        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
552    }
553}
554
555/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
558#[inline]
559#[target_feature(enable = "avx512bw")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vpaddusb))]
562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
563pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
564    unsafe {
565        let add = _mm512_adds_epu8(a, b).as_u8x64();
566        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
567    }
568}
569
570/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
571///
572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
573#[inline]
574#[target_feature(enable = "avx512bw,avx512vl")]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[cfg_attr(test, assert_instr(vpaddusb))]
577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
578pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
579    unsafe {
580        let add = _mm256_adds_epu8(a, b).as_u8x32();
581        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
582    }
583}
584
585/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
588#[inline]
589#[target_feature(enable = "avx512bw,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpaddusb))]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
594    unsafe {
595        let add = _mm256_adds_epu8(a, b).as_u8x32();
596        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
597    }
598}
599
600/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
603#[inline]
604#[target_feature(enable = "avx512bw,avx512vl")]
605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
606#[cfg_attr(test, assert_instr(vpaddusb))]
607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
608pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
609    unsafe {
610        let add = _mm_adds_epu8(a, b).as_u8x16();
611        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
612    }
613}
614
615/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
618#[inline]
619#[target_feature(enable = "avx512bw,avx512vl")]
620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
621#[cfg_attr(test, assert_instr(vpaddusb))]
622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
623pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
624    unsafe {
625        let add = _mm_adds_epu8(a, b).as_u8x16();
626        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
627    }
628}
629
630/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
633#[inline]
634#[target_feature(enable = "avx512bw")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpaddsw))]
637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
638pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
639    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
640}
641
642/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
645#[inline]
646#[target_feature(enable = "avx512bw")]
647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
648#[cfg_attr(test, assert_instr(vpaddsw))]
649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
650pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
651    unsafe {
652        let add = _mm512_adds_epi16(a, b).as_i16x32();
653        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
660#[inline]
661#[target_feature(enable = "avx512bw")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
666    unsafe {
667        let add = _mm512_adds_epi16(a, b).as_i16x32();
668        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
669    }
670}
671
672/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
675#[inline]
676#[target_feature(enable = "avx512bw,avx512vl")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpaddsw))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
681    unsafe {
682        let add = _mm256_adds_epi16(a, b).as_i16x16();
683        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
684    }
685}
686
687/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
690#[inline]
691#[target_feature(enable = "avx512bw,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpaddsw))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
696    unsafe {
697        let add = _mm256_adds_epi16(a, b).as_i16x16();
698        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
699    }
700}
701
702/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
705#[inline]
706#[target_feature(enable = "avx512bw,avx512vl")]
707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
708#[cfg_attr(test, assert_instr(vpaddsw))]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
711    unsafe {
712        let add = _mm_adds_epi16(a, b).as_i16x8();
713        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
714    }
715}
716
717/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
720#[inline]
721#[target_feature(enable = "avx512bw,avx512vl")]
722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
723#[cfg_attr(test, assert_instr(vpaddsw))]
724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
725pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726    unsafe {
727        let add = _mm_adds_epi16(a, b).as_i16x8();
728        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
729    }
730}
731
732/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
733///
734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
735#[inline]
736#[target_feature(enable = "avx512bw")]
737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
738#[cfg_attr(test, assert_instr(vpaddsb))]
739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
740pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
741    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
742}
743
744/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
745///
746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
747#[inline]
748#[target_feature(enable = "avx512bw")]
749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
750#[cfg_attr(test, assert_instr(vpaddsb))]
751#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
752pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_adds_epi8(a, b).as_i8x64();
755        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
756    }
757}
758
759/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
762#[inline]
763#[target_feature(enable = "avx512bw")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddsb))]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
768    unsafe {
769        let add = _mm512_adds_epi8(a, b).as_i8x64();
770        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
771    }
772}
773
774/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
777#[inline]
778#[target_feature(enable = "avx512bw,avx512vl")]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780#[cfg_attr(test, assert_instr(vpaddsb))]
781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
782pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
783    unsafe {
784        let add = _mm256_adds_epi8(a, b).as_i8x32();
785        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
786    }
787}
788
789/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
792#[inline]
793#[target_feature(enable = "avx512bw,avx512vl")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpaddsb))]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
798    unsafe {
799        let add = _mm256_adds_epi8(a, b).as_i8x32();
800        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
801    }
802}
803
804/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
807#[inline]
808#[target_feature(enable = "avx512bw,avx512vl")]
809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
810#[cfg_attr(test, assert_instr(vpaddsb))]
811#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
812pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
813    unsafe {
814        let add = _mm_adds_epi8(a, b).as_i8x16();
815        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
816    }
817}
818
819/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
825#[cfg_attr(test, assert_instr(vpaddsb))]
826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
827pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
828    unsafe {
829        let add = _mm_adds_epi8(a, b).as_i8x16();
830        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
831    }
832}
833
834/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
837#[inline]
838#[target_feature(enable = "avx512bw")]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[cfg_attr(test, assert_instr(vpsubw))]
841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
842pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
843    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
844}
845
846/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
849#[inline]
850#[target_feature(enable = "avx512bw")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpsubw))]
853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
854pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
855    unsafe {
856        let sub = _mm512_sub_epi16(a, b).as_i16x32();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
864#[inline]
865#[target_feature(enable = "avx512bw")]
866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
870    unsafe {
871        let sub = _mm512_sub_epi16(a, b).as_i16x32();
872        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
873    }
874}
875
876/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
879#[inline]
880#[target_feature(enable = "avx512bw,avx512vl")]
881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
882#[cfg_attr(test, assert_instr(vpsubw))]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
885    unsafe {
886        let sub = _mm256_sub_epi16(a, b).as_i16x16();
887        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
888    }
889}
890
891/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
894#[inline]
895#[target_feature(enable = "avx512bw,avx512vl")]
896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
897#[cfg_attr(test, assert_instr(vpsubw))]
898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
899pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
900    unsafe {
901        let sub = _mm256_sub_epi16(a, b).as_i16x16();
902        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
903    }
904}
905
906/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
912#[cfg_attr(test, assert_instr(vpsubw))]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
915    unsafe {
916        let sub = _mm_sub_epi16(a, b).as_i16x8();
917        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
918    }
919}
920
921/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
924#[inline]
925#[target_feature(enable = "avx512bw,avx512vl")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vpsubw))]
928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
929pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
930    unsafe {
931        let sub = _mm_sub_epi16(a, b).as_i16x8();
932        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
933    }
934}
935
936/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
939#[inline]
940#[target_feature(enable = "avx512bw")]
941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
942#[cfg_attr(test, assert_instr(vpsubb))]
943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
944pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
945    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
946}
947
948/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
951#[inline]
952#[target_feature(enable = "avx512bw")]
953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
954#[cfg_attr(test, assert_instr(vpsubb))]
955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
956pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
957    unsafe {
958        let sub = _mm512_sub_epi8(a, b).as_i8x64();
959        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
960    }
961}
962
963/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
966#[inline]
967#[target_feature(enable = "avx512bw")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpsubb))]
970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
971pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
972    unsafe {
973        let sub = _mm512_sub_epi8(a, b).as_i8x64();
974        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
975    }
976}
977
978/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
981#[inline]
982#[target_feature(enable = "avx512bw,avx512vl")]
983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
984#[cfg_attr(test, assert_instr(vpsubb))]
985#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
986pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
987    unsafe {
988        let sub = _mm256_sub_epi8(a, b).as_i8x32();
989        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
990    }
991}
992
993/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
996#[inline]
997#[target_feature(enable = "avx512bw,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubb))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    unsafe {
1003        let sub = _mm256_sub_epi8(a, b).as_i8x32();
1004        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1005    }
1006}
1007
1008/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
1011#[inline]
1012#[target_feature(enable = "avx512bw,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpsubb))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1017    unsafe {
1018        let sub = _mm_sub_epi8(a, b).as_i8x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1020    }
1021}
1022
1023/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpsubb))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1032    unsafe {
1033        let sub = _mm_sub_epi8(a, b).as_i8x16();
1034        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1035    }
1036}
1037
1038/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
1041#[inline]
1042#[target_feature(enable = "avx512bw")]
1043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1044#[cfg_attr(test, assert_instr(vpsubusw))]
1045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1046pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
1047    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
1048}
1049
1050/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
1053#[inline]
1054#[target_feature(enable = "avx512bw")]
1055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1056#[cfg_attr(test, assert_instr(vpsubusw))]
1057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1058pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1059    unsafe {
1060        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1061        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
1062    }
1063}
1064
1065/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071#[cfg_attr(test, assert_instr(vpsubusw))]
1072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1073pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1074    unsafe {
1075        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1076        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1077    }
1078}
1079
1080/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1083#[inline]
1084#[target_feature(enable = "avx512bw,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpsubusw))]
1087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1088pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1089    unsafe {
1090        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1091        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1092    }
1093}
1094
1095/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1096///
1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1098#[inline]
1099#[target_feature(enable = "avx512bw,avx512vl")]
1100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1101#[cfg_attr(test, assert_instr(vpsubusw))]
1102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1103pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1104    unsafe {
1105        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1106        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1107    }
1108}
1109
1110/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1113#[inline]
1114#[target_feature(enable = "avx512bw,avx512vl")]
1115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1116#[cfg_attr(test, assert_instr(vpsubusw))]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    unsafe {
1120        let sub = _mm_subs_epu16(a, b).as_u16x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1122    }
1123}
1124
1125/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1128#[inline]
1129#[target_feature(enable = "avx512bw,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubusw))]
1132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1133pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1134    unsafe {
1135        let sub = _mm_subs_epu16(a, b).as_u16x8();
1136        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1137    }
1138}
1139
1140/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1141///
1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1143#[inline]
1144#[target_feature(enable = "avx512bw")]
1145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1146#[cfg_attr(test, assert_instr(vpsubusb))]
1147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1148pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1149    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1150}
1151
1152/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1155#[inline]
1156#[target_feature(enable = "avx512bw")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpsubusb))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1161    unsafe {
1162        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1163        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1164    }
1165}
1166
1167/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1170#[inline]
1171#[target_feature(enable = "avx512bw")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubusb))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1176    unsafe {
1177        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1178        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1179    }
1180}
1181
1182/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1188#[cfg_attr(test, assert_instr(vpsubusb))]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1191    unsafe {
1192        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1193        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1194    }
1195}
1196
1197/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubusb))]
1204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1205pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1206    unsafe {
1207        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1208        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1209    }
1210}
1211
1212/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1215#[inline]
1216#[target_feature(enable = "avx512bw,avx512vl")]
1217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1218#[cfg_attr(test, assert_instr(vpsubusb))]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1221    unsafe {
1222        let sub = _mm_subs_epu8(a, b).as_u8x16();
1223        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1224    }
1225}
1226
1227/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[cfg_attr(test, assert_instr(vpsubusb))]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1236    unsafe {
1237        let sub = _mm_subs_epu8(a, b).as_u8x16();
1238        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1239    }
1240}
1241
1242/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1243///
1244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1245#[inline]
1246#[target_feature(enable = "avx512bw")]
1247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1248#[cfg_attr(test, assert_instr(vpsubsw))]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1251    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1252}
1253
1254/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1257#[inline]
1258#[target_feature(enable = "avx512bw")]
1259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1260#[cfg_attr(test, assert_instr(vpsubsw))]
1261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1262pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1263    unsafe {
1264        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1265        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1266    }
1267}
1268
1269/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1272#[inline]
1273#[target_feature(enable = "avx512bw")]
1274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1275#[cfg_attr(test, assert_instr(vpsubsw))]
1276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1277pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1278    unsafe {
1279        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1280        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1281    }
1282}
1283
1284/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1285///
1286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1287#[inline]
1288#[target_feature(enable = "avx512bw,avx512vl")]
1289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1290#[cfg_attr(test, assert_instr(vpsubsw))]
1291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1292pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1293    unsafe {
1294        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1295        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1296    }
1297}
1298
1299/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1302#[inline]
1303#[target_feature(enable = "avx512bw,avx512vl")]
1304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1305#[cfg_attr(test, assert_instr(vpsubsw))]
1306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1307pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1308    unsafe {
1309        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1310        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1311    }
1312}
1313
1314/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1317#[inline]
1318#[target_feature(enable = "avx512bw,avx512vl")]
1319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1320#[cfg_attr(test, assert_instr(vpsubsw))]
1321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1322pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1323    unsafe {
1324        let sub = _mm_subs_epi16(a, b).as_i16x8();
1325        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1326    }
1327}
1328
1329/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1332#[inline]
1333#[target_feature(enable = "avx512bw,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpsubsw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1338    unsafe {
1339        let sub = _mm_subs_epi16(a, b).as_i16x8();
1340        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1341    }
1342}
1343
1344/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1347#[inline]
1348#[target_feature(enable = "avx512bw")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpsubsb))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1353    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1354}
1355
1356/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1362#[cfg_attr(test, assert_instr(vpsubsb))]
1363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1364pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1365    unsafe {
1366        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1367        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1368    }
1369}
1370
1371/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1374#[inline]
1375#[target_feature(enable = "avx512bw")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vpsubsb))]
1378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1379pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1380    unsafe {
1381        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1382        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1383    }
1384}
1385
1386/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1392#[cfg_attr(test, assert_instr(vpsubsb))]
1393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1394pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1395    unsafe {
1396        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1397        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1398    }
1399}
1400
1401/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1404#[inline]
1405#[target_feature(enable = "avx512bw,avx512vl")]
1406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1407#[cfg_attr(test, assert_instr(vpsubsb))]
1408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1409pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1410    unsafe {
1411        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1412        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1413    }
1414}
1415
1416/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1419#[inline]
1420#[target_feature(enable = "avx512bw,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpsubsb))]
1423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1424pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1425    unsafe {
1426        let sub = _mm_subs_epi8(a, b).as_i8x16();
1427        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1428    }
1429}
1430
1431/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1434#[inline]
1435#[target_feature(enable = "avx512bw,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpsubsb))]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1440    unsafe {
1441        let sub = _mm_subs_epi8(a, b).as_i8x16();
1442        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1443    }
1444}
1445
1446/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1447///
1448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1449#[inline]
1450#[target_feature(enable = "avx512bw")]
1451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1452#[cfg_attr(test, assert_instr(vpmulhuw))]
1453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1454pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1455    unsafe {
1456        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1457        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1458        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1459        transmute(simd_cast::<u32x32, u16x32>(r))
1460    }
1461}
1462
1463/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1464///
1465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1466#[inline]
1467#[target_feature(enable = "avx512bw")]
1468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1469#[cfg_attr(test, assert_instr(vpmulhuw))]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_mulhi_epu16(
1472    src: __m512i,
1473    k: __mmask32,
1474    a: __m512i,
1475    b: __m512i,
1476) -> __m512i {
1477    unsafe {
1478        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1479        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1480    }
1481}
1482
1483/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1486#[inline]
1487#[target_feature(enable = "avx512bw")]
1488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1489#[cfg_attr(test, assert_instr(vpmulhuw))]
1490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1491pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1492    unsafe {
1493        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1494        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1495    }
1496}
1497
1498/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1499///
1500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1501#[inline]
1502#[target_feature(enable = "avx512bw,avx512vl")]
1503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1504#[cfg_attr(test, assert_instr(vpmulhuw))]
1505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1506pub const fn _mm256_mask_mulhi_epu16(
1507    src: __m256i,
1508    k: __mmask16,
1509    a: __m256i,
1510    b: __m256i,
1511) -> __m256i {
1512    unsafe {
1513        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1514        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1515    }
1516}
1517
1518/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1521#[inline]
1522#[target_feature(enable = "avx512bw,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpmulhuw))]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1527    unsafe {
1528        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1529        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1530    }
1531}
1532
1533/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1536#[inline]
1537#[target_feature(enable = "avx512bw,avx512vl")]
1538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1539#[cfg_attr(test, assert_instr(vpmulhuw))]
1540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1541pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1544        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1545    }
1546}
1547
1548/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1551#[inline]
1552#[target_feature(enable = "avx512bw,avx512vl")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhuw))]
1555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1556pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1557    unsafe {
1558        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1559        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1560    }
1561}
1562
1563/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1566#[inline]
1567#[target_feature(enable = "avx512bw")]
1568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1569#[cfg_attr(test, assert_instr(vpmulhw))]
1570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1571pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1572    unsafe {
1573        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1574        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1575        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1576        transmute(simd_cast::<i32x32, i16x32>(r))
1577    }
1578}
1579
1580/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1583#[inline]
1584#[target_feature(enable = "avx512bw")]
1585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1586#[cfg_attr(test, assert_instr(vpmulhw))]
1587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1588pub const fn _mm512_mask_mulhi_epi16(
1589    src: __m512i,
1590    k: __mmask32,
1591    a: __m512i,
1592    b: __m512i,
1593) -> __m512i {
1594    unsafe {
1595        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1596        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1597    }
1598}
1599
1600/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1603#[inline]
1604#[target_feature(enable = "avx512bw")]
1605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1606#[cfg_attr(test, assert_instr(vpmulhw))]
1607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1608pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1609    unsafe {
1610        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1611        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1612    }
1613}
1614
1615/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhw))]
1622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1623pub const fn _mm256_mask_mulhi_epi16(
1624    src: __m256i,
1625    k: __mmask16,
1626    a: __m256i,
1627    b: __m256i,
1628) -> __m256i {
1629    unsafe {
1630        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1631        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1632    }
1633}
1634
1635/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1638#[inline]
1639#[target_feature(enable = "avx512bw,avx512vl")]
1640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1641#[cfg_attr(test, assert_instr(vpmulhw))]
1642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1643pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1644    unsafe {
1645        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1646        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1647    }
1648}
1649
1650/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1653#[inline]
1654#[target_feature(enable = "avx512bw,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmulhw))]
1657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1658pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1659    unsafe {
1660        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1661        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1662    }
1663}
1664
1665/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1666///
1667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1668#[inline]
1669#[target_feature(enable = "avx512bw,avx512vl")]
1670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1671#[cfg_attr(test, assert_instr(vpmulhw))]
1672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1673pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1674    unsafe {
1675        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1676        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1677    }
1678}
1679
1680/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1683#[inline]
1684#[target_feature(enable = "avx512bw")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpmulhrsw))]
1687pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1688    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1689}
1690
1691/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1694#[inline]
1695#[target_feature(enable = "avx512bw")]
1696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1697#[cfg_attr(test, assert_instr(vpmulhrsw))]
1698pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1699    unsafe {
1700        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1701        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1702    }
1703}
1704
1705/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1708#[inline]
1709#[target_feature(enable = "avx512bw")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpmulhrsw))]
1712pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1713    unsafe {
1714        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1715        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1716    }
1717}
1718
1719/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1722#[inline]
1723#[target_feature(enable = "avx512bw,avx512vl")]
1724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1725#[cfg_attr(test, assert_instr(vpmulhrsw))]
1726pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1727    unsafe {
1728        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1729        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1730    }
1731}
1732
1733/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1736#[inline]
1737#[target_feature(enable = "avx512bw,avx512vl")]
1738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1739#[cfg_attr(test, assert_instr(vpmulhrsw))]
1740pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1741    unsafe {
1742        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1743        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1744    }
1745}
1746
1747/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1750#[inline]
1751#[target_feature(enable = "avx512bw,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpmulhrsw))]
1754pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1755    unsafe {
1756        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1757        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1758    }
1759}
1760
1761/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1764#[inline]
1765#[target_feature(enable = "avx512bw,avx512vl")]
1766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1767#[cfg_attr(test, assert_instr(vpmulhrsw))]
1768pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1769    unsafe {
1770        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1771        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1772    }
1773}
1774
1775/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1778#[inline]
1779#[target_feature(enable = "avx512bw")]
1780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1781#[cfg_attr(test, assert_instr(vpmullw))]
1782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1783pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1784    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1785}
1786
1787/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1790#[inline]
1791#[target_feature(enable = "avx512bw")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vpmullw))]
1794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1795pub const fn _mm512_mask_mullo_epi16(
1796    src: __m512i,
1797    k: __mmask32,
1798    a: __m512i,
1799    b: __m512i,
1800) -> __m512i {
1801    unsafe {
1802        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1803        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1804    }
1805}
1806
1807/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1808///
1809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1810#[inline]
1811#[target_feature(enable = "avx512bw")]
1812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1813#[cfg_attr(test, assert_instr(vpmullw))]
1814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1815pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1816    unsafe {
1817        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1818        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1819    }
1820}
1821
1822/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1823///
1824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1825#[inline]
1826#[target_feature(enable = "avx512bw,avx512vl")]
1827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1828#[cfg_attr(test, assert_instr(vpmullw))]
1829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1830pub const fn _mm256_mask_mullo_epi16(
1831    src: __m256i,
1832    k: __mmask16,
1833    a: __m256i,
1834    b: __m256i,
1835) -> __m256i {
1836    unsafe {
1837        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1838        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1839    }
1840}
1841
1842/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1843///
1844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1845#[inline]
1846#[target_feature(enable = "avx512bw,avx512vl")]
1847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1848#[cfg_attr(test, assert_instr(vpmullw))]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1851    unsafe {
1852        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1853        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1854    }
1855}
1856
1857/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1860#[inline]
1861#[target_feature(enable = "avx512bw,avx512vl")]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863#[cfg_attr(test, assert_instr(vpmullw))]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1866    unsafe {
1867        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1869    }
1870}
1871
1872/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1875#[inline]
1876#[target_feature(enable = "avx512bw,avx512vl")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpmullw))]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1881    unsafe {
1882        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1883        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1884    }
1885}
1886
1887/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1890#[inline]
1891#[target_feature(enable = "avx512bw")]
1892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1893#[cfg_attr(test, assert_instr(vpmaxuw))]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1896    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
1897}
1898
1899/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1902#[inline]
1903#[target_feature(enable = "avx512bw")]
1904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1905#[cfg_attr(test, assert_instr(vpmaxuw))]
1906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1907pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1908    unsafe {
1909        let max = _mm512_max_epu16(a, b).as_u16x32();
1910        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1911    }
1912}
1913
1914/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1915///
1916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1917#[inline]
1918#[target_feature(enable = "avx512bw")]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[cfg_attr(test, assert_instr(vpmaxuw))]
1921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1922pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1923    unsafe {
1924        let max = _mm512_max_epu16(a, b).as_u16x32();
1925        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1926    }
1927}
1928
1929/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1932#[inline]
1933#[target_feature(enable = "avx512bw,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpmaxuw))]
1936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1937pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1938    unsafe {
1939        let max = _mm256_max_epu16(a, b).as_u16x16();
1940        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1941    }
1942}
1943
1944/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1947#[inline]
1948#[target_feature(enable = "avx512bw,avx512vl")]
1949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1950#[cfg_attr(test, assert_instr(vpmaxuw))]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1953    unsafe {
1954        let max = _mm256_max_epu16(a, b).as_u16x16();
1955        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1956    }
1957}
1958
1959/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1960///
1961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1962#[inline]
1963#[target_feature(enable = "avx512bw,avx512vl")]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[cfg_attr(test, assert_instr(vpmaxuw))]
1966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1967pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1968    unsafe {
1969        let max = _mm_max_epu16(a, b).as_u16x8();
1970        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1971    }
1972}
1973
1974/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1977#[inline]
1978#[target_feature(enable = "avx512bw,avx512vl")]
1979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1980#[cfg_attr(test, assert_instr(vpmaxuw))]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1983    unsafe {
1984        let max = _mm_max_epu16(a, b).as_u16x8();
1985        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1986    }
1987}
1988
1989/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1992#[inline]
1993#[target_feature(enable = "avx512bw")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpmaxub))]
1996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1997pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1998    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
1999}
2000
2001/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
2004#[inline]
2005#[target_feature(enable = "avx512bw")]
2006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2007#[cfg_attr(test, assert_instr(vpmaxub))]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2010    unsafe {
2011        let max = _mm512_max_epu8(a, b).as_u8x64();
2012        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
2013    }
2014}
2015
2016/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
2019#[inline]
2020#[target_feature(enable = "avx512bw")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vpmaxub))]
2023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2024pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2025    unsafe {
2026        let max = _mm512_max_epu8(a, b).as_u8x64();
2027        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
2028    }
2029}
2030
2031/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
2034#[inline]
2035#[target_feature(enable = "avx512bw,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2039pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        let max = _mm256_max_epu8(a, b).as_u8x32();
2042        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
2043    }
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
2049#[inline]
2050#[target_feature(enable = "avx512bw,avx512vl")]
2051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2052#[cfg_attr(test, assert_instr(vpmaxub))]
2053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2054pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055    unsafe {
2056        let max = _mm256_max_epu8(a, b).as_u8x32();
2057        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
2058    }
2059}
2060
2061/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
2064#[inline]
2065#[target_feature(enable = "avx512bw,avx512vl")]
2066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2067#[cfg_attr(test, assert_instr(vpmaxub))]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2070    unsafe {
2071        let max = _mm_max_epu8(a, b).as_u8x16();
2072        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
2073    }
2074}
2075
2076/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
2079#[inline]
2080#[target_feature(enable = "avx512bw,avx512vl")]
2081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2082#[cfg_attr(test, assert_instr(vpmaxub))]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2085    unsafe {
2086        let max = _mm_max_epu8(a, b).as_u8x16();
2087        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
2088    }
2089}
2090
2091/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
2094#[inline]
2095#[target_feature(enable = "avx512bw")]
2096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2097#[cfg_attr(test, assert_instr(vpmaxsw))]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
2100    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2101}
2102
2103/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
2106#[inline]
2107#[target_feature(enable = "avx512bw")]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109#[cfg_attr(test, assert_instr(vpmaxsw))]
2110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2111pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2112    unsafe {
2113        let max = _mm512_max_epi16(a, b).as_i16x32();
2114        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
2115    }
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpmaxsw))]
2125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2126pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    unsafe {
2128        let max = _mm512_max_epi16(a, b).as_i16x32();
2129        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
2130    }
2131}
2132
2133/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
2136#[inline]
2137#[target_feature(enable = "avx512bw,avx512vl")]
2138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2139#[cfg_attr(test, assert_instr(vpmaxsw))]
2140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2141pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2142    unsafe {
2143        let max = _mm256_max_epi16(a, b).as_i16x16();
2144        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
2145    }
2146}
2147
2148/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
2151#[inline]
2152#[target_feature(enable = "avx512bw,avx512vl")]
2153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2154#[cfg_attr(test, assert_instr(vpmaxsw))]
2155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2156pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2157    unsafe {
2158        let max = _mm256_max_epi16(a, b).as_i16x16();
2159        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2160    }
2161}
2162
2163/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2166#[inline]
2167#[target_feature(enable = "avx512bw,avx512vl")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpmaxsw))]
2170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2171pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2172    unsafe {
2173        let max = _mm_max_epi16(a, b).as_i16x8();
2174        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2175    }
2176}
2177
2178/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2181#[inline]
2182#[target_feature(enable = "avx512bw,avx512vl")]
2183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2184#[cfg_attr(test, assert_instr(vpmaxsw))]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    unsafe {
2188        let max = _mm_max_epi16(a, b).as_i16x8();
2189        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2190    }
2191}
2192
2193/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2196#[inline]
2197#[target_feature(enable = "avx512bw")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpmaxsb))]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2202    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2203}
2204
2205/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2208#[inline]
2209#[target_feature(enable = "avx512bw")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpmaxsb))]
2212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2213pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2214    unsafe {
2215        let max = _mm512_max_epi8(a, b).as_i8x64();
2216        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2217    }
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2223#[inline]
2224#[target_feature(enable = "avx512bw")]
2225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2226#[cfg_attr(test, assert_instr(vpmaxsb))]
2227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2228pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2229    unsafe {
2230        let max = _mm512_max_epi8(a, b).as_i8x64();
2231        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2232    }
2233}
2234
2235/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2238#[inline]
2239#[target_feature(enable = "avx512bw,avx512vl")]
2240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2241#[cfg_attr(test, assert_instr(vpmaxsb))]
2242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2243pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2244    unsafe {
2245        let max = _mm256_max_epi8(a, b).as_i8x32();
2246        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2247    }
2248}
2249
2250/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2253#[inline]
2254#[target_feature(enable = "avx512bw,avx512vl")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpmaxsb))]
2257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2258pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2259    unsafe {
2260        let max = _mm256_max_epi8(a, b).as_i8x32();
2261        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2262    }
2263}
2264
2265/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2268#[inline]
2269#[target_feature(enable = "avx512bw,avx512vl")]
2270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2271#[cfg_attr(test, assert_instr(vpmaxsb))]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2274    unsafe {
2275        let max = _mm_max_epi8(a, b).as_i8x16();
2276        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2277    }
2278}
2279
2280/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2281///
2282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2283#[inline]
2284#[target_feature(enable = "avx512bw,avx512vl")]
2285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2286#[cfg_attr(test, assert_instr(vpmaxsb))]
2287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2288pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2289    unsafe {
2290        let max = _mm_max_epi8(a, b).as_i8x16();
2291        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2292    }
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2298#[inline]
2299#[target_feature(enable = "avx512bw")]
2300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2301#[cfg_attr(test, assert_instr(vpminuw))]
2302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2303pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2304    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
2305}
2306
2307/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2310#[inline]
2311#[target_feature(enable = "avx512bw")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpminuw))]
2314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2315pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2316    unsafe {
2317        let min = _mm512_min_epu16(a, b).as_u16x32();
2318        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2319    }
2320}
2321
2322/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2323///
2324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2325#[inline]
2326#[target_feature(enable = "avx512bw")]
2327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2328#[cfg_attr(test, assert_instr(vpminuw))]
2329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2330pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2331    unsafe {
2332        let min = _mm512_min_epu16(a, b).as_u16x32();
2333        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2334    }
2335}
2336
2337/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2340#[inline]
2341#[target_feature(enable = "avx512bw,avx512vl")]
2342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2343#[cfg_attr(test, assert_instr(vpminuw))]
2344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2345pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2346    unsafe {
2347        let min = _mm256_min_epu16(a, b).as_u16x16();
2348        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2349    }
2350}
2351
2352/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2355#[inline]
2356#[target_feature(enable = "avx512bw,avx512vl")]
2357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2358#[cfg_attr(test, assert_instr(vpminuw))]
2359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2360pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2361    unsafe {
2362        let min = _mm256_min_epu16(a, b).as_u16x16();
2363        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2364    }
2365}
2366
2367/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2368///
2369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2370#[inline]
2371#[target_feature(enable = "avx512bw,avx512vl")]
2372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2373#[cfg_attr(test, assert_instr(vpminuw))]
2374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2375pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2376    unsafe {
2377        let min = _mm_min_epu16(a, b).as_u16x8();
2378        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2379    }
2380}
2381
2382/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2383///
2384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2385#[inline]
2386#[target_feature(enable = "avx512bw,avx512vl")]
2387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2388#[cfg_attr(test, assert_instr(vpminuw))]
2389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2390pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2391    unsafe {
2392        let min = _mm_min_epu16(a, b).as_u16x8();
2393        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2394    }
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2400#[inline]
2401#[target_feature(enable = "avx512bw")]
2402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2403#[cfg_attr(test, assert_instr(vpminub))]
2404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2405pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2406    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
2407}
2408
2409/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2412#[inline]
2413#[target_feature(enable = "avx512bw")]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415#[cfg_attr(test, assert_instr(vpminub))]
2416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2417pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2418    unsafe {
2419        let min = _mm512_min_epu8(a, b).as_u8x64();
2420        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2421    }
2422}
2423
2424/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2427#[inline]
2428#[target_feature(enable = "avx512bw")]
2429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2430#[cfg_attr(test, assert_instr(vpminub))]
2431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2432pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2433    unsafe {
2434        let min = _mm512_min_epu8(a, b).as_u8x64();
2435        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2436    }
2437}
2438
2439/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2440///
2441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2442#[inline]
2443#[target_feature(enable = "avx512bw,avx512vl")]
2444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2445#[cfg_attr(test, assert_instr(vpminub))]
2446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2447pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2448    unsafe {
2449        let min = _mm256_min_epu8(a, b).as_u8x32();
2450        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2451    }
2452}
2453
2454/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2457#[inline]
2458#[target_feature(enable = "avx512bw,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vpminub))]
2461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2462pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2463    unsafe {
2464        let min = _mm256_min_epu8(a, b).as_u8x32();
2465        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2466    }
2467}
2468
2469/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2470///
2471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2472#[inline]
2473#[target_feature(enable = "avx512bw,avx512vl")]
2474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2475#[cfg_attr(test, assert_instr(vpminub))]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2478    unsafe {
2479        let min = _mm_min_epu8(a, b).as_u8x16();
2480        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2481    }
2482}
2483
2484/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2485///
2486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2487#[inline]
2488#[target_feature(enable = "avx512bw,avx512vl")]
2489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2490#[cfg_attr(test, assert_instr(vpminub))]
2491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2492pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2493    unsafe {
2494        let min = _mm_min_epu8(a, b).as_u8x16();
2495        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2496    }
2497}
2498
2499/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2502#[inline]
2503#[target_feature(enable = "avx512bw")]
2504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2505#[cfg_attr(test, assert_instr(vpminsw))]
2506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2507pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2508    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2509}
2510
2511/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2512///
2513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2514#[inline]
2515#[target_feature(enable = "avx512bw")]
2516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2517#[cfg_attr(test, assert_instr(vpminsw))]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2520    unsafe {
2521        let min = _mm512_min_epi16(a, b).as_i16x32();
2522        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2523    }
2524}
2525
2526/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2529#[inline]
2530#[target_feature(enable = "avx512bw")]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532#[cfg_attr(test, assert_instr(vpminsw))]
2533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2534pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2535    unsafe {
2536        let min = _mm512_min_epi16(a, b).as_i16x32();
2537        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2538    }
2539}
2540
2541/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2544#[inline]
2545#[target_feature(enable = "avx512bw,avx512vl")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpminsw))]
2548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2549pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2550    unsafe {
2551        let min = _mm256_min_epi16(a, b).as_i16x16();
2552        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2553    }
2554}
2555
2556/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2559#[inline]
2560#[target_feature(enable = "avx512bw,avx512vl")]
2561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2562#[cfg_attr(test, assert_instr(vpminsw))]
2563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2564pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2565    unsafe {
2566        let min = _mm256_min_epi16(a, b).as_i16x16();
2567        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2568    }
2569}
2570
2571/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2572///
2573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2574#[inline]
2575#[target_feature(enable = "avx512bw,avx512vl")]
2576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2577#[cfg_attr(test, assert_instr(vpminsw))]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2580    unsafe {
2581        let min = _mm_min_epi16(a, b).as_i16x8();
2582        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2583    }
2584}
2585
2586/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2589#[inline]
2590#[target_feature(enable = "avx512bw,avx512vl")]
2591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2592#[cfg_attr(test, assert_instr(vpminsw))]
2593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2594pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2595    unsafe {
2596        let min = _mm_min_epi16(a, b).as_i16x8();
2597        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2598    }
2599}
2600
2601/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2604#[inline]
2605#[target_feature(enable = "avx512bw")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vpminsb))]
2608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2609pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2610    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2611}
2612
2613/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2614///
2615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2616#[inline]
2617#[target_feature(enable = "avx512bw")]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619#[cfg_attr(test, assert_instr(vpminsb))]
2620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2621pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2622    unsafe {
2623        let min = _mm512_min_epi8(a, b).as_i8x64();
2624        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2625    }
2626}
2627
2628/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2631#[inline]
2632#[target_feature(enable = "avx512bw")]
2633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2634#[cfg_attr(test, assert_instr(vpminsb))]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2637    unsafe {
2638        let min = _mm512_min_epi8(a, b).as_i8x64();
2639        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2640    }
2641}
2642
2643/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2646#[inline]
2647#[target_feature(enable = "avx512bw,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vpminsb))]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2652    unsafe {
2653        let min = _mm256_min_epi8(a, b).as_i8x32();
2654        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2655    }
2656}
2657
2658/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2661#[inline]
2662#[target_feature(enable = "avx512bw,avx512vl")]
2663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2664#[cfg_attr(test, assert_instr(vpminsb))]
2665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2666pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2667    unsafe {
2668        let min = _mm256_min_epi8(a, b).as_i8x32();
2669        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2670    }
2671}
2672
2673/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2676#[inline]
2677#[target_feature(enable = "avx512bw,avx512vl")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpminsb))]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2682    unsafe {
2683        let min = _mm_min_epi8(a, b).as_i8x16();
2684        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2685    }
2686}
2687
2688/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2691#[inline]
2692#[target_feature(enable = "avx512bw,avx512vl")]
2693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2694#[cfg_attr(test, assert_instr(vpminsb))]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2697    unsafe {
2698        let min = _mm_min_epi8(a, b).as_i8x16();
2699        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2700    }
2701}
2702
2703/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2706#[inline]
2707#[target_feature(enable = "avx512bw")]
2708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2709#[cfg_attr(test, assert_instr(vpcmp))]
2710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2711pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2712    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2713}
2714
2715/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2718#[inline]
2719#[target_feature(enable = "avx512bw")]
2720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2721#[cfg_attr(test, assert_instr(vpcmp))]
2722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2723pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2724    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2725}
2726
2727/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2736    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2737}
2738
2739/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2742#[inline]
2743#[target_feature(enable = "avx512bw,avx512vl")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2748    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2749}
2750
2751/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2754#[inline]
2755#[target_feature(enable = "avx512bw,avx512vl")]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757#[cfg_attr(test, assert_instr(vpcmp))]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2760    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2761}
2762
2763/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2772    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2773}
2774
2775/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2778#[inline]
2779#[target_feature(enable = "avx512bw")]
2780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2781#[cfg_attr(test, assert_instr(vpcmp))]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2784    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2785}
2786
2787/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2790#[inline]
2791#[target_feature(enable = "avx512bw")]
2792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2793#[cfg_attr(test, assert_instr(vpcmp))]
2794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2795pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2796    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2797}
2798
2799/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2802#[inline]
2803#[target_feature(enable = "avx512bw,avx512vl")]
2804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2805#[cfg_attr(test, assert_instr(vpcmp))]
2806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2807pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2808    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2809}
2810
2811/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2812///
2813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2814#[inline]
2815#[target_feature(enable = "avx512bw,avx512vl")]
2816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2817#[cfg_attr(test, assert_instr(vpcmp))]
2818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2819pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2820    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2821}
2822
2823/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2826#[inline]
2827#[target_feature(enable = "avx512bw,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpcmp))]
2830#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2831pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2832    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2833}
2834
2835/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2836///
2837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2838#[inline]
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2841#[cfg_attr(test, assert_instr(vpcmp))]
2842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2843pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2844    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2845}
2846
2847/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2850#[inline]
2851#[target_feature(enable = "avx512bw")]
2852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2853#[cfg_attr(test, assert_instr(vpcmp))]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2856    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2857}
2858
2859/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2862#[inline]
2863#[target_feature(enable = "avx512bw")]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2867pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2868    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2869}
2870
2871/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2874#[inline]
2875#[target_feature(enable = "avx512bw,avx512vl")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2880    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2881}
2882
2883/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2886#[inline]
2887#[target_feature(enable = "avx512bw,avx512vl")]
2888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2889#[cfg_attr(test, assert_instr(vpcmp))]
2890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2891pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2893}
2894
2895/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2903pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2904    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2905}
2906
2907/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2908///
2909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2910#[inline]
2911#[target_feature(enable = "avx512bw,avx512vl")]
2912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2913#[cfg_attr(test, assert_instr(vpcmp))]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2916    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2917}
2918
2919/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2920///
2921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2922#[inline]
2923#[target_feature(enable = "avx512bw")]
2924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2925#[cfg_attr(test, assert_instr(vpcmp))]
2926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2927pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2928    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2929}
2930
2931/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2932///
2933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2934#[inline]
2935#[target_feature(enable = "avx512bw")]
2936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2937#[cfg_attr(test, assert_instr(vpcmp))]
2938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2939pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2940    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2941}
2942
2943/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2944///
2945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2946#[inline]
2947#[target_feature(enable = "avx512bw,avx512vl")]
2948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2949#[cfg_attr(test, assert_instr(vpcmp))]
2950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2951pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2952    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2953}
2954
2955/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
2960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2961#[cfg_attr(test, assert_instr(vpcmp))]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2964    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2965}
2966
2967/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2970#[inline]
2971#[target_feature(enable = "avx512bw,avx512vl")]
2972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2973#[cfg_attr(test, assert_instr(vpcmp))]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2976    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2977}
2978
2979/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2980///
2981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2982#[inline]
2983#[target_feature(enable = "avx512bw,avx512vl")]
2984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2985#[cfg_attr(test, assert_instr(vpcmp))]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2988    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2989}
2990
2991/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2994#[inline]
2995#[target_feature(enable = "avx512bw")]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2999pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3000    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
3001}
3002
3003/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3012    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3013}
3014
3015/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3016///
3017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
3020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3021#[cfg_attr(test, assert_instr(vpcmp))]
3022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3023pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3024    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
3025}
3026
3027/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3036    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3037}
3038
3039/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
3042#[inline]
3043#[target_feature(enable = "avx512bw,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vpcmp))]
3046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3047pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3048    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
3049}
3050
3051/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3052///
3053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
3054#[inline]
3055#[target_feature(enable = "avx512bw,avx512vl")]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057#[cfg_attr(test, assert_instr(vpcmp))]
3058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3059pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3060    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3061}
3062
3063/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3064///
3065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
3066#[inline]
3067#[target_feature(enable = "avx512bw")]
3068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3069#[cfg_attr(test, assert_instr(vpcmp))]
3070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3071pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3072    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
3073}
3074
3075/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3076///
3077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
3078#[inline]
3079#[target_feature(enable = "avx512bw")]
3080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3081#[cfg_attr(test, assert_instr(vpcmp))]
3082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3083pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3084    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3085}
3086
3087/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3088///
3089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
3090#[inline]
3091#[target_feature(enable = "avx512bw,avx512vl")]
3092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3093#[cfg_attr(test, assert_instr(vpcmp))]
3094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3095pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3096    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
3097}
3098
3099/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3100///
3101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
3102#[inline]
3103#[target_feature(enable = "avx512bw,avx512vl")]
3104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3105#[cfg_attr(test, assert_instr(vpcmp))]
3106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3107pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3108    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3109}
3110
3111/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3112///
3113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
3114#[inline]
3115#[target_feature(enable = "avx512bw,avx512vl")]
3116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3117#[cfg_attr(test, assert_instr(vpcmp))]
3118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3119pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3120    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
3121}
3122
3123/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3132    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3143pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3144    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
3145}
3146
3147/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3148///
3149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
3150#[inline]
3151#[target_feature(enable = "avx512bw")]
3152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3153#[cfg_attr(test, assert_instr(vpcmp))]
3154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3155pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3156    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3157}
3158
3159/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3167pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3168    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
3169}
3170
3171/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3172///
3173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
3174#[inline]
3175#[target_feature(enable = "avx512bw,avx512vl")]
3176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3177#[cfg_attr(test, assert_instr(vpcmp))]
3178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3179pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3180    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3181}
3182
3183/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3184///
3185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
3186#[inline]
3187#[target_feature(enable = "avx512bw,avx512vl")]
3188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3189#[cfg_attr(test, assert_instr(vpcmp))]
3190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3191pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3192    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
3193}
3194
3195/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3196///
3197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3201#[cfg_attr(test, assert_instr(vpcmp))]
3202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3203pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3204    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3205}
3206
3207/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3208///
3209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
3210#[inline]
3211#[target_feature(enable = "avx512bw")]
3212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3213#[cfg_attr(test, assert_instr(vpcmp))]
3214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3215pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3216    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3217}
3218
3219/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3222#[inline]
3223#[target_feature(enable = "avx512bw")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpcmp))]
3226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3227pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3228    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3229}
3230
3231/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3232///
3233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3234#[inline]
3235#[target_feature(enable = "avx512bw,avx512vl")]
3236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3237#[cfg_attr(test, assert_instr(vpcmp))]
3238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3239pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3240    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3241}
3242
3243/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3246#[inline]
3247#[target_feature(enable = "avx512bw,avx512vl")]
3248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3249#[cfg_attr(test, assert_instr(vpcmp))]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3252    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3253}
3254
3255/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3264    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3270#[inline]
3271#[target_feature(enable = "avx512bw,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3275pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3276    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3277}
3278
3279/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3280///
3281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3282#[inline]
3283#[target_feature(enable = "avx512bw")]
3284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3285#[cfg_attr(test, assert_instr(vpcmp))]
3286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3287pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3288    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3289}
3290
3291/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3294#[inline]
3295#[target_feature(enable = "avx512bw")]
3296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3299pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3300    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3301}
3302
3303/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3306#[inline]
3307#[target_feature(enable = "avx512bw,avx512vl")]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309#[cfg_attr(test, assert_instr(vpcmp))]
3310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3311pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3312    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3313}
3314
3315/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3316///
3317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3321#[cfg_attr(test, assert_instr(vpcmp))]
3322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3323pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3324    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3325}
3326
3327/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3328///
3329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3330#[inline]
3331#[target_feature(enable = "avx512bw,avx512vl")]
3332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3333#[cfg_attr(test, assert_instr(vpcmp))]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3336    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3337}
3338
3339/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3340///
3341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3342#[inline]
3343#[target_feature(enable = "avx512bw,avx512vl")]
3344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3345#[cfg_attr(test, assert_instr(vpcmp))]
3346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3347pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3348    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3349}
3350
3351/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3352///
3353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3354#[inline]
3355#[target_feature(enable = "avx512bw")]
3356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3357#[cfg_attr(test, assert_instr(vpcmp))]
3358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3359pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3360    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3361}
3362
3363/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3364///
3365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3366#[inline]
3367#[target_feature(enable = "avx512bw")]
3368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3369#[cfg_attr(test, assert_instr(vpcmp))]
3370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3371pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3372    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3373}
3374
3375/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3376///
3377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3381#[cfg_attr(test, assert_instr(vpcmp))]
3382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3383pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3384    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3385}
3386
3387/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3396    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3402#[inline]
3403#[target_feature(enable = "avx512bw,avx512vl")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3408    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3409}
3410
3411/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3414#[inline]
3415#[target_feature(enable = "avx512bw,avx512vl")]
3416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3417#[cfg_attr(test, assert_instr(vpcmp))]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3420    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3421}
3422
3423/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3426#[inline]
3427#[target_feature(enable = "avx512bw")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3432    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3433}
3434
3435/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3438#[inline]
3439#[target_feature(enable = "avx512bw")]
3440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3441#[cfg_attr(test, assert_instr(vpcmp))]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3444    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3445}
3446
3447/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3448///
3449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3450#[inline]
3451#[target_feature(enable = "avx512bw,avx512vl")]
3452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3453#[cfg_attr(test, assert_instr(vpcmp))]
3454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3455pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3456    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3457}
3458
3459/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3460///
3461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3462#[inline]
3463#[target_feature(enable = "avx512bw,avx512vl")]
3464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3465#[cfg_attr(test, assert_instr(vpcmp))]
3466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3467pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3468    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3469}
3470
3471/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3472///
3473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3474#[inline]
3475#[target_feature(enable = "avx512bw,avx512vl")]
3476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3477#[cfg_attr(test, assert_instr(vpcmp))]
3478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3479pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3480    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3481}
3482
3483/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3484///
3485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3486#[inline]
3487#[target_feature(enable = "avx512bw,avx512vl")]
3488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3489#[cfg_attr(test, assert_instr(vpcmp))]
3490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3491pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3492    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3493}
3494
3495/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3498#[inline]
3499#[target_feature(enable = "avx512bw")]
3500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3501#[cfg_attr(test, assert_instr(vpcmp))]
3502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3503pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3504    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3505}
3506
3507/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3508///
3509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3510#[inline]
3511#[target_feature(enable = "avx512bw")]
3512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3513#[cfg_attr(test, assert_instr(vpcmp))]
3514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3515pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3516    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3517}
3518
3519/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3527pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3528    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3534#[inline]
3535#[target_feature(enable = "avx512bw,avx512vl")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3539pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3540    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3541}
3542
3543/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3546#[inline]
3547#[target_feature(enable = "avx512bw,avx512vl")]
3548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3549#[cfg_attr(test, assert_instr(vpcmp))]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3552    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3563pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3564    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3565}
3566
3567/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3568///
3569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3570#[inline]
3571#[target_feature(enable = "avx512bw")]
3572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3573#[cfg_attr(test, assert_instr(vpcmp))]
3574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3575pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3576    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3577}
3578
3579/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3582#[inline]
3583#[target_feature(enable = "avx512bw")]
3584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3585#[cfg_attr(test, assert_instr(vpcmp))]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3588    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3589}
3590
3591/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3592///
3593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3594#[inline]
3595#[target_feature(enable = "avx512bw,avx512vl")]
3596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3597#[cfg_attr(test, assert_instr(vpcmp))]
3598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3599pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3600    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3601}
3602
3603/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3606#[inline]
3607#[target_feature(enable = "avx512bw,avx512vl")]
3608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3609#[cfg_attr(test, assert_instr(vpcmp))]
3610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3611pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3613}
3614
3615/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3621#[cfg_attr(test, assert_instr(vpcmp))]
3622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3623pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3624    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3625}
3626
3627/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3628///
3629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3630#[inline]
3631#[target_feature(enable = "avx512bw,avx512vl")]
3632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3633#[cfg_attr(test, assert_instr(vpcmp))]
3634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3635pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3636    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3637}
3638
3639/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3642#[inline]
3643#[target_feature(enable = "avx512bw")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vpcmp))]
3646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3647pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3648    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3649}
3650
3651/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3654#[inline]
3655#[target_feature(enable = "avx512bw")]
3656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3659pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3660    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3666#[inline]
3667#[target_feature(enable = "avx512bw,avx512vl")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3671pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3672    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3673}
3674
3675/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681#[cfg_attr(test, assert_instr(vpcmp))]
3682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3683pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3684    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3695pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3696    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3697}
3698
3699/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3700///
3701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3702#[inline]
3703#[target_feature(enable = "avx512bw,avx512vl")]
3704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3705#[cfg_attr(test, assert_instr(vpcmp))]
3706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3707pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3708    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3709}
3710
3711/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3712///
3713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3714#[inline]
3715#[target_feature(enable = "avx512bw")]
3716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3717#[cfg_attr(test, assert_instr(vpcmp))]
3718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3719pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3720    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3721}
3722
3723/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3724///
3725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3726#[inline]
3727#[target_feature(enable = "avx512bw")]
3728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3729#[cfg_attr(test, assert_instr(vpcmp))]
3730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3731pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3732    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3733}
3734
3735/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3736///
3737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3738#[inline]
3739#[target_feature(enable = "avx512bw,avx512vl")]
3740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3741#[cfg_attr(test, assert_instr(vpcmp))]
3742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3743pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3744    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3745}
3746
3747/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3748///
3749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3750#[inline]
3751#[target_feature(enable = "avx512bw,avx512vl")]
3752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3753#[cfg_attr(test, assert_instr(vpcmp))]
3754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3755pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3756    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3757}
3758
3759/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3762#[inline]
3763#[target_feature(enable = "avx512bw,avx512vl")]
3764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3765#[cfg_attr(test, assert_instr(vpcmp))]
3766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3767pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3768    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3769}
3770
3771/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3772///
3773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3774#[inline]
3775#[target_feature(enable = "avx512bw,avx512vl")]
3776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3777#[cfg_attr(test, assert_instr(vpcmp))]
3778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3779pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3780    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3781}
3782
3783/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3786#[inline]
3787#[target_feature(enable = "avx512bw")]
3788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3791pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3803pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3804    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3805}
3806
3807/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3808///
3809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3810#[inline]
3811#[target_feature(enable = "avx512bw,avx512vl")]
3812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3813#[cfg_attr(test, assert_instr(vpcmp))]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3827pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3828    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3829}
3830
3831/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3834#[inline]
3835#[target_feature(enable = "avx512bw,avx512vl")]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837#[cfg_attr(test, assert_instr(vpcmp))]
3838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3839pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3840    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3841}
3842
3843/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3844///
3845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3846#[inline]
3847#[target_feature(enable = "avx512bw,avx512vl")]
3848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3849#[cfg_attr(test, assert_instr(vpcmp))]
3850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3851pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3852    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3853}
3854
3855/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3856///
3857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3858#[inline]
3859#[target_feature(enable = "avx512bw")]
3860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3861#[cfg_attr(test, assert_instr(vpcmp))]
3862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3863pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3864    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3865}
3866
3867/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3868///
3869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3870#[inline]
3871#[target_feature(enable = "avx512bw")]
3872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3873#[cfg_attr(test, assert_instr(vpcmp))]
3874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3875pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3876    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3877}
3878
3879/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3880///
3881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3882#[inline]
3883#[target_feature(enable = "avx512bw,avx512vl")]
3884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3885#[cfg_attr(test, assert_instr(vpcmp))]
3886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3887pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3888    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3889}
3890
3891/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3892///
3893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3894#[inline]
3895#[target_feature(enable = "avx512bw,avx512vl")]
3896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3897#[cfg_attr(test, assert_instr(vpcmp))]
3898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3899pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3900    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3901}
3902
3903/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3904///
3905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3906#[inline]
3907#[target_feature(enable = "avx512bw,avx512vl")]
3908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3909#[cfg_attr(test, assert_instr(vpcmp))]
3910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3911pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3912    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3921#[cfg_attr(test, assert_instr(vpcmp))]
3922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3923pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3924    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3935pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3936    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3937}
3938
3939/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3940///
3941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3942#[inline]
3943#[target_feature(enable = "avx512bw")]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945#[cfg_attr(test, assert_instr(vpcmp))]
3946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3947pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3948    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3959pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3960    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3961}
3962
3963/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3964///
3965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3966#[inline]
3967#[target_feature(enable = "avx512bw,avx512vl")]
3968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3969#[cfg_attr(test, assert_instr(vpcmp))]
3970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3971pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3972    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3973}
3974
3975/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3976///
3977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3978#[inline]
3979#[target_feature(enable = "avx512bw,avx512vl")]
3980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3981#[cfg_attr(test, assert_instr(vpcmp))]
3982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3983pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3984    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3985}
3986
3987/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
3992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3993#[cfg_attr(test, assert_instr(vpcmp))]
3994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3995pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3996    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3997}
3998
3999/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4000///
4001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
4002#[inline]
4003#[target_feature(enable = "avx512bw")]
4004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4005#[cfg_attr(test, assert_instr(vpcmp))]
4006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4007pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4008    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
4009}
4010
4011/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4012///
4013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
4014#[inline]
4015#[target_feature(enable = "avx512bw")]
4016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4017#[cfg_attr(test, assert_instr(vpcmp))]
4018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4019pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4020    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4021}
4022
4023/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4024///
4025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
4026#[inline]
4027#[target_feature(enable = "avx512bw,avx512vl")]
4028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4029#[cfg_attr(test, assert_instr(vpcmp))]
4030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4031pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4032    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
4033}
4034
4035/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4036///
4037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
4038#[inline]
4039#[target_feature(enable = "avx512bw,avx512vl")]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041#[cfg_attr(test, assert_instr(vpcmp))]
4042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4043pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4044    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4045}
4046
4047/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
4050#[inline]
4051#[target_feature(enable = "avx512bw,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vpcmp))]
4054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4055pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4056    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
4057}
4058
4059/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
4062#[inline]
4063#[target_feature(enable = "avx512bw,avx512vl")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4067pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4068    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4069}
4070
4071/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4072///
4073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
4074#[inline]
4075#[target_feature(enable = "avx512bw")]
4076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4077#[cfg_attr(test, assert_instr(vpcmp))]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4080    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
4086#[inline]
4087#[target_feature(enable = "avx512bw")]
4088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4092    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4093}
4094
4095/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4096///
4097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
4098#[inline]
4099#[target_feature(enable = "avx512bw,avx512vl")]
4100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4101#[cfg_attr(test, assert_instr(vpcmp))]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4104    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
4105}
4106
4107/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4108///
4109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
4110#[inline]
4111#[target_feature(enable = "avx512bw,avx512vl")]
4112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4113#[cfg_attr(test, assert_instr(vpcmp))]
4114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4115pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4116    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4117}
4118
4119/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
4122#[inline]
4123#[target_feature(enable = "avx512bw,avx512vl")]
4124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4125#[cfg_attr(test, assert_instr(vpcmp))]
4126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4127pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4128    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
4129}
4130
4131/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4132///
4133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
4134#[inline]
4135#[target_feature(enable = "avx512bw,avx512vl")]
4136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4137#[cfg_attr(test, assert_instr(vpcmp))]
4138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4139pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4140    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4149#[cfg_attr(test, assert_instr(vpcmp))]
4150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4151pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4152    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
4153}
4154
4155/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4156///
4157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
4158#[inline]
4159#[target_feature(enable = "avx512bw")]
4160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4161#[cfg_attr(test, assert_instr(vpcmp))]
4162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4163pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4164    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4165}
4166
4167/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4168///
4169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
4170#[inline]
4171#[target_feature(enable = "avx512bw,avx512vl")]
4172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4173#[cfg_attr(test, assert_instr(vpcmp))]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4176    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
4177}
4178
4179/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4180///
4181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
4182#[inline]
4183#[target_feature(enable = "avx512bw,avx512vl")]
4184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4185#[cfg_attr(test, assert_instr(vpcmp))]
4186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4187pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4188    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4189}
4190
4191/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4192///
4193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
4194#[inline]
4195#[target_feature(enable = "avx512bw,avx512vl")]
4196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4197#[cfg_attr(test, assert_instr(vpcmp))]
4198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4199pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4200    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
4201}
4202
4203/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4204///
4205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
4206#[inline]
4207#[target_feature(enable = "avx512bw,avx512vl")]
4208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4209#[cfg_attr(test, assert_instr(vpcmp))]
4210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4211pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4212    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4213}
4214
4215/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4221#[cfg_attr(test, assert_instr(vpcmp))]
4222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4223pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4224    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
4225}
4226
4227/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4228///
4229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
4230#[inline]
4231#[target_feature(enable = "avx512bw")]
4232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4233#[cfg_attr(test, assert_instr(vpcmp))]
4234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4235pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4236    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4237}
4238
4239/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4240///
4241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
4242#[inline]
4243#[target_feature(enable = "avx512bw,avx512vl")]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245#[cfg_attr(test, assert_instr(vpcmp))]
4246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4247pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4248    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
4249}
4250
4251/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
4254#[inline]
4255#[target_feature(enable = "avx512bw,avx512vl")]
4256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4257#[cfg_attr(test, assert_instr(vpcmp))]
4258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4259pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4260    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4261}
4262
4263/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4264///
4265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
4266#[inline]
4267#[target_feature(enable = "avx512bw,avx512vl")]
4268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4269#[cfg_attr(test, assert_instr(vpcmp))]
4270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4271pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4272    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
4273}
4274
4275/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4276///
4277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
4278#[inline]
4279#[target_feature(enable = "avx512bw,avx512vl")]
4280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4281#[cfg_attr(test, assert_instr(vpcmp))]
4282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4283pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4284    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4285}
4286
4287/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[cfg_attr(test, assert_instr(vpcmp))]
4294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4295pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4296    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
4297}
4298
4299/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4300///
4301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
4302#[inline]
4303#[target_feature(enable = "avx512bw")]
4304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4305#[cfg_attr(test, assert_instr(vpcmp))]
4306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4307pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4308    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4309}
4310
4311/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4312///
4313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4314#[inline]
4315#[target_feature(enable = "avx512bw,avx512vl")]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317#[cfg_attr(test, assert_instr(vpcmp))]
4318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4319pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4320    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4321}
4322
4323/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4326#[inline]
4327#[target_feature(enable = "avx512bw,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vpcmp))]
4330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4331pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4332    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4333}
4334
4335/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4336///
4337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4338#[inline]
4339#[target_feature(enable = "avx512bw,avx512vl")]
4340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4341#[cfg_attr(test, assert_instr(vpcmp))]
4342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4343pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4344    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4345}
4346
4347/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4348///
4349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4350#[inline]
4351#[target_feature(enable = "avx512bw,avx512vl")]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353#[cfg_attr(test, assert_instr(vpcmp))]
4354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4355pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4356    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4357}
4358
4359/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4360///
4361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4362#[inline]
4363#[target_feature(enable = "avx512bw")]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365#[cfg_attr(test, assert_instr(vpcmp))]
4366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4367pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4368    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4369}
4370
4371/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4374#[inline]
4375#[target_feature(enable = "avx512bw")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vpcmp))]
4378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4379pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4380    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4381}
4382
4383/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4386#[inline]
4387#[target_feature(enable = "avx512bw,avx512vl")]
4388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4389#[cfg_attr(test, assert_instr(vpcmp))]
4390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4391pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4392    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4393}
4394
4395/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4396///
4397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4398#[inline]
4399#[target_feature(enable = "avx512bw,avx512vl")]
4400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4401#[cfg_attr(test, assert_instr(vpcmp))]
4402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4403pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4404    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4405}
4406
4407/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4413#[cfg_attr(test, assert_instr(vpcmp))]
4414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4415pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4417}
4418
4419/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4422#[inline]
4423#[target_feature(enable = "avx512bw,avx512vl")]
4424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4425#[cfg_attr(test, assert_instr(vpcmp))]
4426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4427pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4428    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4429}
4430
4431/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4432///
4433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4434#[inline]
4435#[target_feature(enable = "avx512bw")]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437#[rustc_legacy_const_generics(2)]
4438#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4440pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u16x32();
4444        let b = b.as_u16x32();
4445        let r = match IMM8 {
4446            0 => simd_eq(a, b),
4447            1 => simd_lt(a, b),
4448            2 => simd_le(a, b),
4449            3 => i16x32::ZERO,
4450            4 => simd_ne(a, b),
4451            5 => simd_ge(a, b),
4452            6 => simd_gt(a, b),
4453            _ => i16x32::splat(-1),
4454        };
4455        simd_bitmask(r)
4456    }
4457}
4458
4459/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4462#[inline]
4463#[target_feature(enable = "avx512bw")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[rustc_legacy_const_generics(3)]
4466#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4468pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4469    k1: __mmask32,
4470    a: __m512i,
4471    b: __m512i,
4472) -> __mmask32 {
4473    unsafe {
4474        static_assert_uimm_bits!(IMM8, 3);
4475        let a = a.as_u16x32();
4476        let b = b.as_u16x32();
4477        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4478        let r = match IMM8 {
4479            0 => simd_and(k1, simd_eq(a, b)),
4480            1 => simd_and(k1, simd_lt(a, b)),
4481            2 => simd_and(k1, simd_le(a, b)),
4482            3 => i16x32::ZERO,
4483            4 => simd_and(k1, simd_ne(a, b)),
4484            5 => simd_and(k1, simd_ge(a, b)),
4485            6 => simd_and(k1, simd_gt(a, b)),
4486            _ => k1,
4487        };
4488        simd_bitmask(r)
4489    }
4490}
4491
4492/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4495#[inline]
4496#[target_feature(enable = "avx512bw,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[rustc_legacy_const_generics(2)]
4499#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_u16x16();
4505        let b = b.as_u16x16();
4506        let r = match IMM8 {
4507            0 => simd_eq(a, b),
4508            1 => simd_lt(a, b),
4509            2 => simd_le(a, b),
4510            3 => i16x16::ZERO,
4511            4 => simd_ne(a, b),
4512            5 => simd_ge(a, b),
4513            6 => simd_gt(a, b),
4514            _ => i16x16::splat(-1),
4515        };
4516        simd_bitmask(r)
4517    }
4518}
4519
4520/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4526#[rustc_legacy_const_generics(3)]
4527#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4529pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4530    k1: __mmask16,
4531    a: __m256i,
4532    b: __m256i,
4533) -> __mmask16 {
4534    unsafe {
4535        static_assert_uimm_bits!(IMM8, 3);
4536        let a = a.as_u16x16();
4537        let b = b.as_u16x16();
4538        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4539        let r = match IMM8 {
4540            0 => simd_and(k1, simd_eq(a, b)),
4541            1 => simd_and(k1, simd_lt(a, b)),
4542            2 => simd_and(k1, simd_le(a, b)),
4543            3 => i16x16::ZERO,
4544            4 => simd_and(k1, simd_ne(a, b)),
4545            5 => simd_and(k1, simd_ge(a, b)),
4546            6 => simd_and(k1, simd_gt(a, b)),
4547            _ => k1,
4548        };
4549        simd_bitmask(r)
4550    }
4551}
4552
4553/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4556#[inline]
4557#[target_feature(enable = "avx512bw,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[rustc_legacy_const_generics(2)]
4560#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4562pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4563    unsafe {
4564        static_assert_uimm_bits!(IMM8, 3);
4565        let a = a.as_u16x8();
4566        let b = b.as_u16x8();
4567        let r = match IMM8 {
4568            0 => simd_eq(a, b),
4569            1 => simd_lt(a, b),
4570            2 => simd_le(a, b),
4571            3 => i16x8::ZERO,
4572            4 => simd_ne(a, b),
4573            5 => simd_ge(a, b),
4574            6 => simd_gt(a, b),
4575            _ => i16x8::splat(-1),
4576        };
4577        simd_bitmask(r)
4578    }
4579}
4580
4581/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4584#[inline]
4585#[target_feature(enable = "avx512bw,avx512vl")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[rustc_legacy_const_generics(3)]
4588#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4590pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4591    k1: __mmask8,
4592    a: __m128i,
4593    b: __m128i,
4594) -> __mmask8 {
4595    unsafe {
4596        static_assert_uimm_bits!(IMM8, 3);
4597        let a = a.as_u16x8();
4598        let b = b.as_u16x8();
4599        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4600        let r = match IMM8 {
4601            0 => simd_and(k1, simd_eq(a, b)),
4602            1 => simd_and(k1, simd_lt(a, b)),
4603            2 => simd_and(k1, simd_le(a, b)),
4604            3 => i16x8::ZERO,
4605            4 => simd_and(k1, simd_ne(a, b)),
4606            5 => simd_and(k1, simd_ge(a, b)),
4607            6 => simd_and(k1, simd_gt(a, b)),
4608            _ => k1,
4609        };
4610        simd_bitmask(r)
4611    }
4612}
4613
4614/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4617#[inline]
4618#[target_feature(enable = "avx512bw")]
4619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4620#[rustc_legacy_const_generics(2)]
4621#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4623pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4624    unsafe {
4625        static_assert_uimm_bits!(IMM8, 3);
4626        let a = a.as_u8x64();
4627        let b = b.as_u8x64();
4628        let r = match IMM8 {
4629            0 => simd_eq(a, b),
4630            1 => simd_lt(a, b),
4631            2 => simd_le(a, b),
4632            3 => i8x64::ZERO,
4633            4 => simd_ne(a, b),
4634            5 => simd_ge(a, b),
4635            6 => simd_gt(a, b),
4636            _ => i8x64::splat(-1),
4637        };
4638        simd_bitmask(r)
4639    }
4640}
4641
4642/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4643///
4644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4645#[inline]
4646#[target_feature(enable = "avx512bw")]
4647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4648#[rustc_legacy_const_generics(3)]
4649#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4651pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4652    k1: __mmask64,
4653    a: __m512i,
4654    b: __m512i,
4655) -> __mmask64 {
4656    unsafe {
4657        static_assert_uimm_bits!(IMM8, 3);
4658        let a = a.as_u8x64();
4659        let b = b.as_u8x64();
4660        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4661        let r = match IMM8 {
4662            0 => simd_and(k1, simd_eq(a, b)),
4663            1 => simd_and(k1, simd_lt(a, b)),
4664            2 => simd_and(k1, simd_le(a, b)),
4665            3 => i8x64::ZERO,
4666            4 => simd_and(k1, simd_ne(a, b)),
4667            5 => simd_and(k1, simd_ge(a, b)),
4668            6 => simd_and(k1, simd_gt(a, b)),
4669            _ => k1,
4670        };
4671        simd_bitmask(r)
4672    }
4673}
4674
4675/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4676///
4677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4678#[inline]
4679#[target_feature(enable = "avx512bw,avx512vl")]
4680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4681#[rustc_legacy_const_generics(2)]
4682#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4684pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4685    unsafe {
4686        static_assert_uimm_bits!(IMM8, 3);
4687        let a = a.as_u8x32();
4688        let b = b.as_u8x32();
4689        let r = match IMM8 {
4690            0 => simd_eq(a, b),
4691            1 => simd_lt(a, b),
4692            2 => simd_le(a, b),
4693            3 => i8x32::ZERO,
4694            4 => simd_ne(a, b),
4695            5 => simd_ge(a, b),
4696            6 => simd_gt(a, b),
4697            _ => i8x32::splat(-1),
4698        };
4699        simd_bitmask(r)
4700    }
4701}
4702
4703/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4704///
4705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4706#[inline]
4707#[target_feature(enable = "avx512bw,avx512vl")]
4708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4709#[rustc_legacy_const_generics(3)]
4710#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4712pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4713    k1: __mmask32,
4714    a: __m256i,
4715    b: __m256i,
4716) -> __mmask32 {
4717    unsafe {
4718        static_assert_uimm_bits!(IMM8, 3);
4719        let a = a.as_u8x32();
4720        let b = b.as_u8x32();
4721        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4722        let r = match IMM8 {
4723            0 => simd_and(k1, simd_eq(a, b)),
4724            1 => simd_and(k1, simd_lt(a, b)),
4725            2 => simd_and(k1, simd_le(a, b)),
4726            3 => i8x32::ZERO,
4727            4 => simd_and(k1, simd_ne(a, b)),
4728            5 => simd_and(k1, simd_ge(a, b)),
4729            6 => simd_and(k1, simd_gt(a, b)),
4730            _ => k1,
4731        };
4732        simd_bitmask(r)
4733    }
4734}
4735
4736/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4742#[rustc_legacy_const_generics(2)]
4743#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4746    unsafe {
4747        static_assert_uimm_bits!(IMM8, 3);
4748        let a = a.as_u8x16();
4749        let b = b.as_u8x16();
4750        let r = match IMM8 {
4751            0 => simd_eq(a, b),
4752            1 => simd_lt(a, b),
4753            2 => simd_le(a, b),
4754            3 => i8x16::ZERO,
4755            4 => simd_ne(a, b),
4756            5 => simd_ge(a, b),
4757            6 => simd_gt(a, b),
4758            _ => i8x16::splat(-1),
4759        };
4760        simd_bitmask(r)
4761    }
4762}
4763
4764/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4765///
4766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4767#[inline]
4768#[target_feature(enable = "avx512bw,avx512vl")]
4769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4770#[rustc_legacy_const_generics(3)]
4771#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4773pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4774    k1: __mmask16,
4775    a: __m128i,
4776    b: __m128i,
4777) -> __mmask16 {
4778    unsafe {
4779        static_assert_uimm_bits!(IMM8, 3);
4780        let a = a.as_u8x16();
4781        let b = b.as_u8x16();
4782        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4783        let r = match IMM8 {
4784            0 => simd_and(k1, simd_eq(a, b)),
4785            1 => simd_and(k1, simd_lt(a, b)),
4786            2 => simd_and(k1, simd_le(a, b)),
4787            3 => i8x16::ZERO,
4788            4 => simd_and(k1, simd_ne(a, b)),
4789            5 => simd_and(k1, simd_ge(a, b)),
4790            6 => simd_and(k1, simd_gt(a, b)),
4791            _ => k1,
4792        };
4793        simd_bitmask(r)
4794    }
4795}
4796
4797/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4800#[inline]
4801#[target_feature(enable = "avx512bw")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[rustc_legacy_const_generics(2)]
4804#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4806pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4807    unsafe {
4808        static_assert_uimm_bits!(IMM8, 3);
4809        let a = a.as_i16x32();
4810        let b = b.as_i16x32();
4811        let r = match IMM8 {
4812            0 => simd_eq(a, b),
4813            1 => simd_lt(a, b),
4814            2 => simd_le(a, b),
4815            3 => i16x32::ZERO,
4816            4 => simd_ne(a, b),
4817            5 => simd_ge(a, b),
4818            6 => simd_gt(a, b),
4819            _ => i16x32::splat(-1),
4820        };
4821        simd_bitmask(r)
4822    }
4823}
4824
4825/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4826///
4827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4828#[inline]
4829#[target_feature(enable = "avx512bw")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_legacy_const_generics(3)]
4832#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4834pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4835    k1: __mmask32,
4836    a: __m512i,
4837    b: __m512i,
4838) -> __mmask32 {
4839    unsafe {
4840        static_assert_uimm_bits!(IMM8, 3);
4841        let a = a.as_i16x32();
4842        let b = b.as_i16x32();
4843        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4844        let r = match IMM8 {
4845            0 => simd_and(k1, simd_eq(a, b)),
4846            1 => simd_and(k1, simd_lt(a, b)),
4847            2 => simd_and(k1, simd_le(a, b)),
4848            3 => i16x32::ZERO,
4849            4 => simd_and(k1, simd_ne(a, b)),
4850            5 => simd_and(k1, simd_ge(a, b)),
4851            6 => simd_and(k1, simd_gt(a, b)),
4852            _ => k1,
4853        };
4854        simd_bitmask(r)
4855    }
4856}
4857
4858/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4864#[rustc_legacy_const_generics(2)]
4865#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4867pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4868    unsafe {
4869        static_assert_uimm_bits!(IMM8, 3);
4870        let a = a.as_i16x16();
4871        let b = b.as_i16x16();
4872        let r = match IMM8 {
4873            0 => simd_eq(a, b),
4874            1 => simd_lt(a, b),
4875            2 => simd_le(a, b),
4876            3 => i16x16::ZERO,
4877            4 => simd_ne(a, b),
4878            5 => simd_ge(a, b),
4879            6 => simd_gt(a, b),
4880            _ => i16x16::splat(-1),
4881        };
4882        simd_bitmask(r)
4883    }
4884}
4885
4886/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892#[rustc_legacy_const_generics(3)]
4893#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4895pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4896    k1: __mmask16,
4897    a: __m256i,
4898    b: __m256i,
4899) -> __mmask16 {
4900    unsafe {
4901        static_assert_uimm_bits!(IMM8, 3);
4902        let a = a.as_i16x16();
4903        let b = b.as_i16x16();
4904        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4905        let r = match IMM8 {
4906            0 => simd_and(k1, simd_eq(a, b)),
4907            1 => simd_and(k1, simd_lt(a, b)),
4908            2 => simd_and(k1, simd_le(a, b)),
4909            3 => i16x16::ZERO,
4910            4 => simd_and(k1, simd_ne(a, b)),
4911            5 => simd_and(k1, simd_ge(a, b)),
4912            6 => simd_and(k1, simd_gt(a, b)),
4913            _ => k1,
4914        };
4915        simd_bitmask(r)
4916    }
4917}
4918
4919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4920///
4921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4922#[inline]
4923#[target_feature(enable = "avx512bw,avx512vl")]
4924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4925#[rustc_legacy_const_generics(2)]
4926#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4929    unsafe {
4930        static_assert_uimm_bits!(IMM8, 3);
4931        let a = a.as_i16x8();
4932        let b = b.as_i16x8();
4933        let r = match IMM8 {
4934            0 => simd_eq(a, b),
4935            1 => simd_lt(a, b),
4936            2 => simd_le(a, b),
4937            3 => i16x8::ZERO,
4938            4 => simd_ne(a, b),
4939            5 => simd_ge(a, b),
4940            6 => simd_gt(a, b),
4941            _ => i16x8::splat(-1),
4942        };
4943        simd_bitmask(r)
4944    }
4945}
4946
4947/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4948///
4949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4950#[inline]
4951#[target_feature(enable = "avx512bw,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_legacy_const_generics(3)]
4954#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4956pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4957    k1: __mmask8,
4958    a: __m128i,
4959    b: __m128i,
4960) -> __mmask8 {
4961    unsafe {
4962        static_assert_uimm_bits!(IMM8, 3);
4963        let a = a.as_i16x8();
4964        let b = b.as_i16x8();
4965        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4966        let r = match IMM8 {
4967            0 => simd_and(k1, simd_eq(a, b)),
4968            1 => simd_and(k1, simd_lt(a, b)),
4969            2 => simd_and(k1, simd_le(a, b)),
4970            3 => i16x8::ZERO,
4971            4 => simd_and(k1, simd_ne(a, b)),
4972            5 => simd_and(k1, simd_ge(a, b)),
4973            6 => simd_and(k1, simd_gt(a, b)),
4974            _ => k1,
4975        };
4976        simd_bitmask(r)
4977    }
4978}
4979
4980/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4983#[inline]
4984#[target_feature(enable = "avx512bw")]
4985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4986#[rustc_legacy_const_generics(2)]
4987#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4989pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4990    unsafe {
4991        static_assert_uimm_bits!(IMM8, 3);
4992        let a = a.as_i8x64();
4993        let b = b.as_i8x64();
4994        let r = match IMM8 {
4995            0 => simd_eq(a, b),
4996            1 => simd_lt(a, b),
4997            2 => simd_le(a, b),
4998            3 => i8x64::ZERO,
4999            4 => simd_ne(a, b),
5000            5 => simd_ge(a, b),
5001            6 => simd_gt(a, b),
5002            _ => i8x64::splat(-1),
5003        };
5004        simd_bitmask(r)
5005    }
5006}
5007
5008/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5009///
5010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
5011#[inline]
5012#[target_feature(enable = "avx512bw")]
5013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5014#[rustc_legacy_const_generics(3)]
5015#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5017pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
5018    k1: __mmask64,
5019    a: __m512i,
5020    b: __m512i,
5021) -> __mmask64 {
5022    unsafe {
5023        static_assert_uimm_bits!(IMM8, 3);
5024        let a = a.as_i8x64();
5025        let b = b.as_i8x64();
5026        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
5027        let r = match IMM8 {
5028            0 => simd_and(k1, simd_eq(a, b)),
5029            1 => simd_and(k1, simd_lt(a, b)),
5030            2 => simd_and(k1, simd_le(a, b)),
5031            3 => i8x64::ZERO,
5032            4 => simd_and(k1, simd_ne(a, b)),
5033            5 => simd_and(k1, simd_ge(a, b)),
5034            6 => simd_and(k1, simd_gt(a, b)),
5035            _ => k1,
5036        };
5037        simd_bitmask(r)
5038    }
5039}
5040
5041/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5042///
5043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
5044#[inline]
5045#[target_feature(enable = "avx512bw,avx512vl")]
5046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5047#[rustc_legacy_const_generics(2)]
5048#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5050pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
5051    unsafe {
5052        static_assert_uimm_bits!(IMM8, 3);
5053        let a = a.as_i8x32();
5054        let b = b.as_i8x32();
5055        let r = match IMM8 {
5056            0 => simd_eq(a, b),
5057            1 => simd_lt(a, b),
5058            2 => simd_le(a, b),
5059            3 => i8x32::ZERO,
5060            4 => simd_ne(a, b),
5061            5 => simd_ge(a, b),
5062            6 => simd_gt(a, b),
5063            _ => i8x32::splat(-1),
5064        };
5065        simd_bitmask(r)
5066    }
5067}
5068
5069/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5070///
5071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
5072#[inline]
5073#[target_feature(enable = "avx512bw,avx512vl")]
5074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5075#[rustc_legacy_const_generics(3)]
5076#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5078pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
5079    k1: __mmask32,
5080    a: __m256i,
5081    b: __m256i,
5082) -> __mmask32 {
5083    unsafe {
5084        static_assert_uimm_bits!(IMM8, 3);
5085        let a = a.as_i8x32();
5086        let b = b.as_i8x32();
5087        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
5088        let r = match IMM8 {
5089            0 => simd_and(k1, simd_eq(a, b)),
5090            1 => simd_and(k1, simd_lt(a, b)),
5091            2 => simd_and(k1, simd_le(a, b)),
5092            3 => i8x32::ZERO,
5093            4 => simd_and(k1, simd_ne(a, b)),
5094            5 => simd_and(k1, simd_ge(a, b)),
5095            6 => simd_and(k1, simd_gt(a, b)),
5096            _ => k1,
5097        };
5098        simd_bitmask(r)
5099    }
5100}
5101
5102/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5108#[rustc_legacy_const_generics(2)]
5109#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5111pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
5112    unsafe {
5113        static_assert_uimm_bits!(IMM8, 3);
5114        let a = a.as_i8x16();
5115        let b = b.as_i8x16();
5116        let r = match IMM8 {
5117            0 => simd_eq(a, b),
5118            1 => simd_lt(a, b),
5119            2 => simd_le(a, b),
5120            3 => i8x16::ZERO,
5121            4 => simd_ne(a, b),
5122            5 => simd_ge(a, b),
5123            6 => simd_gt(a, b),
5124            _ => i8x16::splat(-1),
5125        };
5126        simd_bitmask(r)
5127    }
5128}
5129
5130/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[rustc_legacy_const_generics(3)]
5137#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5139pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
5140    k1: __mmask16,
5141    a: __m128i,
5142    b: __m128i,
5143) -> __mmask16 {
5144    unsafe {
5145        static_assert_uimm_bits!(IMM8, 3);
5146        let a = a.as_i8x16();
5147        let b = b.as_i8x16();
5148        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
5149        let r = match IMM8 {
5150            0 => simd_and(k1, simd_eq(a, b)),
5151            1 => simd_and(k1, simd_lt(a, b)),
5152            2 => simd_and(k1, simd_le(a, b)),
5153            3 => i8x16::ZERO,
5154            4 => simd_and(k1, simd_ne(a, b)),
5155            5 => simd_and(k1, simd_ge(a, b)),
5156            6 => simd_and(k1, simd_gt(a, b)),
5157            _ => k1,
5158        };
5159        simd_bitmask(r)
5160    }
5161}
5162
5163/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
5166#[inline]
5167#[target_feature(enable = "avx512bw,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5170pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
5171    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
5172}
5173
5174/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
5177#[inline]
5178#[target_feature(enable = "avx512bw,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5181pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
5182    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
5183}
5184
5185/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
5188#[inline]
5189#[target_feature(enable = "avx512bw,avx512vl")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5192pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
5193    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
5194}
5195
5196/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
5199#[inline]
5200#[target_feature(enable = "avx512bw,avx512vl")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5203pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
5204    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
5205}
5206
5207/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
5210#[inline]
5211#[target_feature(enable = "avx512bw,avx512vl")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5214pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
5215    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
5216}
5217
5218/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
5221#[inline]
5222#[target_feature(enable = "avx512bw,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5225pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
5226    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
5227}
5228
5229/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
5232#[inline]
5233#[target_feature(enable = "avx512bw,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5236pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
5237    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
5238}
5239
5240/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5247pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
5248    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
5249}
5250
5251/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
5254#[inline]
5255#[target_feature(enable = "avx512bw,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5258pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
5259    unsafe { simd_reduce_and(a.as_i16x16()) }
5260}
5261
5262/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5269pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
5270    unsafe {
5271        simd_reduce_and(simd_select_bitmask(
5272            k,
5273            a.as_i16x16(),
5274            _mm256_set1_epi64x(-1).as_i16x16(),
5275        ))
5276    }
5277}
5278
5279/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
5282#[inline]
5283#[target_feature(enable = "avx512bw,avx512vl")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5286pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
5287    unsafe { simd_reduce_and(a.as_i16x8()) }
5288}
5289
5290/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5297pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
5298    unsafe {
5299        simd_reduce_and(simd_select_bitmask(
5300            k,
5301            a.as_i16x8(),
5302            _mm_set1_epi64x(-1).as_i16x8(),
5303        ))
5304    }
5305}
5306
5307/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5308///
5309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
5310#[inline]
5311#[target_feature(enable = "avx512bw,avx512vl")]
5312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5314pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
5315    unsafe { simd_reduce_and(a.as_i8x32()) }
5316}
5317
5318/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5319///
5320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
5321#[inline]
5322#[target_feature(enable = "avx512bw,avx512vl")]
5323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5325pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
5326    unsafe {
5327        simd_reduce_and(simd_select_bitmask(
5328            k,
5329            a.as_i8x32(),
5330            _mm256_set1_epi64x(-1).as_i8x32(),
5331        ))
5332    }
5333}
5334
5335/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5336///
5337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
5338#[inline]
5339#[target_feature(enable = "avx512bw,avx512vl")]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5342pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
5343    unsafe { simd_reduce_and(a.as_i8x16()) }
5344}
5345
5346/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5347///
5348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
5349#[inline]
5350#[target_feature(enable = "avx512bw,avx512vl")]
5351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5353pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
5354    unsafe {
5355        simd_reduce_and(simd_select_bitmask(
5356            k,
5357            a.as_i8x16(),
5358            _mm_set1_epi64x(-1).as_i8x16(),
5359        ))
5360    }
5361}
5362
5363/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5364///
5365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
5366#[inline]
5367#[target_feature(enable = "avx512bw,avx512vl")]
5368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5370pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
5371    unsafe { simd_reduce_max(a.as_i16x16()) }
5372}
5373
5374/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5375///
5376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5377#[inline]
5378#[target_feature(enable = "avx512bw,avx512vl")]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5381pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5382    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5383}
5384
5385/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5386///
5387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5388#[inline]
5389#[target_feature(enable = "avx512bw,avx512vl")]
5390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5392pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5393    unsafe { simd_reduce_max(a.as_i16x8()) }
5394}
5395
5396/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5397///
5398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5399#[inline]
5400#[target_feature(enable = "avx512bw,avx512vl")]
5401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5403pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5404    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5405}
5406
5407/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5408///
5409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5410#[inline]
5411#[target_feature(enable = "avx512bw,avx512vl")]
5412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5414pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5415    unsafe { simd_reduce_max(a.as_i8x32()) }
5416}
5417
5418/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5419///
5420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5421#[inline]
5422#[target_feature(enable = "avx512bw,avx512vl")]
5423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5425pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5426    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5427}
5428
5429/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5430///
5431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5432#[inline]
5433#[target_feature(enable = "avx512bw,avx512vl")]
5434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5436pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5437    unsafe { simd_reduce_max(a.as_i8x16()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5447pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5448    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5449}
5450
5451/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5452///
5453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5454#[inline]
5455#[target_feature(enable = "avx512bw,avx512vl")]
5456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5458pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5459    unsafe { simd_reduce_max(a.as_u16x16()) }
5460}
5461
5462/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5469pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5470    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5471}
5472
5473/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5474///
5475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5476#[inline]
5477#[target_feature(enable = "avx512bw,avx512vl")]
5478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5480pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5481    unsafe { simd_reduce_max(a.as_u16x8()) }
5482}
5483
5484/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5485///
5486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5491pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5492    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5493}
5494
5495/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5496///
5497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5498#[inline]
5499#[target_feature(enable = "avx512bw,avx512vl")]
5500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5502pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5503    unsafe { simd_reduce_max(a.as_u8x32()) }
5504}
5505
5506/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5507///
5508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5509#[inline]
5510#[target_feature(enable = "avx512bw,avx512vl")]
5511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5513pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5514    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5515}
5516
5517/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5518///
5519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5520#[inline]
5521#[target_feature(enable = "avx512bw,avx512vl")]
5522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5524pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5525    unsafe { simd_reduce_max(a.as_u8x16()) }
5526}
5527
5528/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5529///
5530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5531#[inline]
5532#[target_feature(enable = "avx512bw,avx512vl")]
5533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5535pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5536    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5537}
5538
5539/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5540///
5541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5542#[inline]
5543#[target_feature(enable = "avx512bw,avx512vl")]
5544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5546pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5547    unsafe { simd_reduce_min(a.as_i16x16()) }
5548}
5549
5550/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5551///
5552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5553#[inline]
5554#[target_feature(enable = "avx512bw,avx512vl")]
5555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5557pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5558    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5559}
5560
5561/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5562///
5563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5564#[inline]
5565#[target_feature(enable = "avx512bw,avx512vl")]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5568pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5569    unsafe { simd_reduce_min(a.as_i16x8()) }
5570}
5571
5572/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5573///
5574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5575#[inline]
5576#[target_feature(enable = "avx512bw,avx512vl")]
5577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5579pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5580    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5581}
5582
5583/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5586#[inline]
5587#[target_feature(enable = "avx512bw,avx512vl")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5590pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5591    unsafe { simd_reduce_min(a.as_i8x32()) }
5592}
5593
5594/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5595///
5596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5597#[inline]
5598#[target_feature(enable = "avx512bw,avx512vl")]
5599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5601pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5602    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5603}
5604
5605/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5606///
5607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5608#[inline]
5609#[target_feature(enable = "avx512bw,avx512vl")]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5612pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5613    unsafe { simd_reduce_min(a.as_i8x16()) }
5614}
5615
5616/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5617///
5618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5619#[inline]
5620#[target_feature(enable = "avx512bw,avx512vl")]
5621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5623pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5624    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5625}
5626
5627/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5628///
5629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5630#[inline]
5631#[target_feature(enable = "avx512bw,avx512vl")]
5632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5634pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5635    unsafe { simd_reduce_min(a.as_u16x16()) }
5636}
5637
5638/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5639///
5640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5641#[inline]
5642#[target_feature(enable = "avx512bw,avx512vl")]
5643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5645pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5646    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5647}
5648
5649/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5650///
5651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5652#[inline]
5653#[target_feature(enable = "avx512bw,avx512vl")]
5654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5656pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5657    unsafe { simd_reduce_min(a.as_u16x8()) }
5658}
5659
5660/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5661///
5662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5663#[inline]
5664#[target_feature(enable = "avx512bw,avx512vl")]
5665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5667pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5668    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5669}
5670
5671/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5678pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5679    unsafe { simd_reduce_min(a.as_u8x32()) }
5680}
5681
5682/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5689pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5690    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5691}
5692
5693/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5700pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5701    unsafe { simd_reduce_min(a.as_u8x16()) }
5702}
5703
5704/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5705///
5706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5707#[inline]
5708#[target_feature(enable = "avx512bw,avx512vl")]
5709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5711pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5712    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5713}
5714
5715/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5716///
5717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5718#[inline]
5719#[target_feature(enable = "avx512bw,avx512vl")]
5720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5722pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5723    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
5724}
5725
5726/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5733pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5734    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
5735}
5736
5737/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5740#[inline]
5741#[target_feature(enable = "avx512bw,avx512vl")]
5742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5744pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5745    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
5746}
5747
5748/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5749///
5750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5751#[inline]
5752#[target_feature(enable = "avx512bw,avx512vl")]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5755pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5756    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
5757}
5758
5759/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5762#[inline]
5763#[target_feature(enable = "avx512bw,avx512vl")]
5764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5766pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5767    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
5768}
5769
5770/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5771///
5772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5773#[inline]
5774#[target_feature(enable = "avx512bw,avx512vl")]
5775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5777pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5778    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
5779}
5780
5781/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5782///
5783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5784#[inline]
5785#[target_feature(enable = "avx512bw,avx512vl")]
5786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5788pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5789    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
5790}
5791
5792/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5793///
5794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5795#[inline]
5796#[target_feature(enable = "avx512bw,avx512vl")]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5799pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5800    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
5801}
5802
5803/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5804///
5805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5806#[inline]
5807#[target_feature(enable = "avx512bw,avx512vl")]
5808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5810pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5811    unsafe { simd_reduce_or(a.as_i16x16()) }
5812}
5813
5814/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5815///
5816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5817#[inline]
5818#[target_feature(enable = "avx512bw,avx512vl")]
5819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5821pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5822    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5823}
5824
5825/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5826///
5827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5828#[inline]
5829#[target_feature(enable = "avx512bw,avx512vl")]
5830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5832pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5833    unsafe { simd_reduce_or(a.as_i16x8()) }
5834}
5835
5836/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5837///
5838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5839#[inline]
5840#[target_feature(enable = "avx512bw,avx512vl")]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5843pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5844    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5845}
5846
5847/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5850#[inline]
5851#[target_feature(enable = "avx512bw,avx512vl")]
5852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5854pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5855    unsafe { simd_reduce_or(a.as_i8x32()) }
5856}
5857
5858/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5861#[inline]
5862#[target_feature(enable = "avx512bw,avx512vl")]
5863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5866    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5867}
5868
5869/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5876pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5877    unsafe { simd_reduce_or(a.as_i8x16()) }
5878}
5879
5880/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5883#[inline]
5884#[target_feature(enable = "avx512bw,avx512vl")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5887pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5888    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5889}
5890
5891/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5894#[inline]
5895#[target_feature(enable = "avx512bw")]
5896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5897#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5899pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5900    ptr::read_unaligned(mem_addr as *const __m512i)
5901}
5902
5903/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5906#[inline]
5907#[target_feature(enable = "avx512bw,avx512vl")]
5908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5909#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5911pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5912    ptr::read_unaligned(mem_addr as *const __m256i)
5913}
5914
5915/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5916///
5917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5918#[inline]
5919#[target_feature(enable = "avx512bw,avx512vl")]
5920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5921#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5923pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5924    ptr::read_unaligned(mem_addr as *const __m128i)
5925}
5926
5927/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5933#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5935pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5936    ptr::read_unaligned(mem_addr as *const __m512i)
5937}
5938
5939/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5940///
5941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5945#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5948    ptr::read_unaligned(mem_addr as *const __m256i)
5949}
5950
5951/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5952///
5953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5954#[inline]
5955#[target_feature(enable = "avx512bw,avx512vl")]
5956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5957#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5959pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5960    ptr::read_unaligned(mem_addr as *const __m128i)
5961}
5962
5963/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5971pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5972    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5973}
5974
5975/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5978#[inline]
5979#[target_feature(enable = "avx512bw,avx512vl")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5983pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5984    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5985}
5986
5987/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5988///
5989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5990#[inline]
5991#[target_feature(enable = "avx512bw,avx512vl")]
5992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5993#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5995pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5996    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5997}
5998
5999/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6000///
6001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
6002#[inline]
6003#[target_feature(enable = "avx512bw")]
6004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6005#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6007pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
6008    ptr::write_unaligned(mem_addr as *mut __m512i, a);
6009}
6010
6011/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
6014#[inline]
6015#[target_feature(enable = "avx512bw,avx512vl")]
6016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6017#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6019pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
6020    ptr::write_unaligned(mem_addr as *mut __m256i, a);
6021}
6022
6023/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6024///
6025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
6026#[inline]
6027#[target_feature(enable = "avx512bw,avx512vl")]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6031pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
6032    ptr::write_unaligned(mem_addr as *mut __m128i, a);
6033}
6034
6035/// Load packed 16-bit integers from memory into dst using writemask k
6036/// (elements are copied from src when the corresponding mask bit is not set).
6037/// mem_addr does not need to be aligned on any particular boundary.
6038///
6039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
6040#[inline]
6041#[target_feature(enable = "avx512bw")]
6042#[cfg_attr(test, assert_instr(vmovdqu16))]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6045pub const unsafe fn _mm512_mask_loadu_epi16(
6046    src: __m512i,
6047    k: __mmask32,
6048    mem_addr: *const i16,
6049) -> __m512i {
6050    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
6051    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
6052}
6053
6054/// Load packed 16-bit integers from memory into dst using zeromask k
6055/// (elements are zeroed out when the corresponding mask bit is not set).
6056/// mem_addr does not need to be aligned on any particular boundary.
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[cfg_attr(test, assert_instr(vmovdqu16))]
6062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6064pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
6065    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
6066}
6067
6068/// Load packed 8-bit integers from memory into dst using writemask k
6069/// (elements are copied from src when the corresponding mask bit is not set).
6070/// mem_addr does not need to be aligned on any particular boundary.
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[cfg_attr(test, assert_instr(vmovdqu8))]
6076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6078pub const unsafe fn _mm512_mask_loadu_epi8(
6079    src: __m512i,
6080    k: __mmask64,
6081    mem_addr: *const i8,
6082) -> __m512i {
6083    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
6084    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
6085}
6086
6087/// Load packed 8-bit integers from memory into dst using zeromask k
6088/// (elements are zeroed out when the corresponding mask bit is not set).
6089/// mem_addr does not need to be aligned on any particular boundary.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[cfg_attr(test, assert_instr(vmovdqu8))]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6097pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
6098    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
6099}
6100
6101/// Load packed 16-bit integers from memory into dst using writemask k
6102/// (elements are copied from src when the corresponding mask bit is not set).
6103/// mem_addr does not need to be aligned on any particular boundary.
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[cfg_attr(test, assert_instr(vmovdqu16))]
6109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6111pub const unsafe fn _mm256_mask_loadu_epi16(
6112    src: __m256i,
6113    k: __mmask16,
6114    mem_addr: *const i16,
6115) -> __m256i {
6116    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
6117    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
6118}
6119
6120/// Load packed 16-bit integers from memory into dst using zeromask k
6121/// (elements are zeroed out when the corresponding mask bit is not set).
6122/// mem_addr does not need to be aligned on any particular boundary.
6123///
6124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
6125#[inline]
6126#[target_feature(enable = "avx512bw,avx512vl")]
6127#[cfg_attr(test, assert_instr(vmovdqu16))]
6128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6130pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
6131    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
6132}
6133
6134/// Load packed 8-bit integers from memory into dst using writemask k
6135/// (elements are copied from src when the corresponding mask bit is not set).
6136/// mem_addr does not need to be aligned on any particular boundary.
6137///
6138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
6139#[inline]
6140#[target_feature(enable = "avx512bw,avx512vl")]
6141#[cfg_attr(test, assert_instr(vmovdqu8))]
6142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6144pub const unsafe fn _mm256_mask_loadu_epi8(
6145    src: __m256i,
6146    k: __mmask32,
6147    mem_addr: *const i8,
6148) -> __m256i {
6149    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
6150    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
6151}
6152
6153/// Load packed 8-bit integers from memory into dst using zeromask k
6154/// (elements are zeroed out when the corresponding mask bit is not set).
6155/// mem_addr does not need to be aligned on any particular boundary.
6156///
6157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
6158#[inline]
6159#[target_feature(enable = "avx512bw,avx512vl")]
6160#[cfg_attr(test, assert_instr(vmovdqu8))]
6161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6163pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
6164    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
6165}
6166
6167/// Load packed 16-bit integers from memory into dst using writemask k
6168/// (elements are copied from src when the corresponding mask bit is not set).
6169/// mem_addr does not need to be aligned on any particular boundary.
6170///
6171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
6172#[inline]
6173#[target_feature(enable = "avx512bw,avx512vl")]
6174#[cfg_attr(test, assert_instr(vmovdqu16))]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6177pub const unsafe fn _mm_mask_loadu_epi16(
6178    src: __m128i,
6179    k: __mmask8,
6180    mem_addr: *const i16,
6181) -> __m128i {
6182    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
6183    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
6184}
6185
6186/// Load packed 16-bit integers from memory into dst using zeromask k
6187/// (elements are zeroed out when the corresponding mask bit is not set).
6188/// mem_addr does not need to be aligned on any particular boundary.
6189///
6190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
6191#[inline]
6192#[target_feature(enable = "avx512bw,avx512vl")]
6193#[cfg_attr(test, assert_instr(vmovdqu16))]
6194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6196pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
6197    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
6198}
6199
6200/// Load packed 8-bit integers from memory into dst using writemask k
6201/// (elements are copied from src when the corresponding mask bit is not set).
6202/// mem_addr does not need to be aligned on any particular boundary.
6203///
6204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
6205#[inline]
6206#[target_feature(enable = "avx512bw,avx512vl")]
6207#[cfg_attr(test, assert_instr(vmovdqu8))]
6208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6210pub const unsafe fn _mm_mask_loadu_epi8(
6211    src: __m128i,
6212    k: __mmask16,
6213    mem_addr: *const i8,
6214) -> __m128i {
6215    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
6216    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
6217}
6218
6219/// Load packed 8-bit integers from memory into dst using zeromask k
6220/// (elements are zeroed out when the corresponding mask bit is not set).
6221/// mem_addr does not need to be aligned on any particular boundary.
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[cfg_attr(test, assert_instr(vmovdqu8))]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6229pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
6230    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
6231}
6232
6233/// Store packed 16-bit integers from a into memory using writemask k.
6234/// mem_addr does not need to be aligned on any particular boundary.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[cfg_attr(test, assert_instr(vmovdqu16))]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6242pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
6243    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
6244    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
6245}
6246
6247/// Store packed 8-bit integers from a into memory using writemask k.
6248/// mem_addr does not need to be aligned on any particular boundary.
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[cfg_attr(test, assert_instr(vmovdqu8))]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6256pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
6257    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
6258    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
6259}
6260
6261/// Store packed 16-bit integers from a into memory using writemask k.
6262/// mem_addr does not need to be aligned on any particular boundary.
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[cfg_attr(test, assert_instr(vmovdqu16))]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6270pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
6271    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
6272    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
6273}
6274
6275/// Store packed 8-bit integers from a into memory using writemask k.
6276/// mem_addr does not need to be aligned on any particular boundary.
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[cfg_attr(test, assert_instr(vmovdqu8))]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6284pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
6285    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
6286    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
6287}
6288
6289/// Store packed 16-bit integers from a into memory using writemask k.
6290/// mem_addr does not need to be aligned on any particular boundary.
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[cfg_attr(test, assert_instr(vmovdqu16))]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6298pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
6299    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
6300    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
6301}
6302
6303/// Store packed 8-bit integers from a into memory using writemask k.
6304/// mem_addr does not need to be aligned on any particular boundary.
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[cfg_attr(test, assert_instr(vmovdqu8))]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6312pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
6313    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
6314    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
6315}
6316
6317/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
6318///
6319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
6320#[inline]
6321#[target_feature(enable = "avx512bw")]
6322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6325    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
6326    //
6327    // ```rust
6328    // #[target_feature(enable = "avx512bw")]
6329    // unsafe fn widening_add(mad: __m512i) -> __m512i {
6330    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
6331    // }
6332    // ```
6333    //
6334    // If we implement this using generic vector intrinsics, the optimizer
6335    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
6336    // For this reason, we use x86 intrinsics.
6337    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
6338}
6339
6340/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6341///
6342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
6343#[inline]
6344#[target_feature(enable = "avx512bw")]
6345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6346#[cfg_attr(test, assert_instr(vpmaddwd))]
6347pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6348    unsafe {
6349        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6350        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
6351    }
6352}
6353
6354/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vpmaddwd))]
6361pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6362    unsafe {
6363        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6364        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
6365    }
6366}
6367
6368/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6369///
6370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
6371#[inline]
6372#[target_feature(enable = "avx512bw,avx512vl")]
6373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6374#[cfg_attr(test, assert_instr(vpmaddwd))]
6375pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6376    unsafe {
6377        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6378        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
6379    }
6380}
6381
6382/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6383///
6384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
6385#[inline]
6386#[target_feature(enable = "avx512bw,avx512vl")]
6387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6388#[cfg_attr(test, assert_instr(vpmaddwd))]
6389pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6390    unsafe {
6391        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6392        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
6393    }
6394}
6395
6396/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6397///
6398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
6399#[inline]
6400#[target_feature(enable = "avx512bw,avx512vl")]
6401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6402#[cfg_attr(test, assert_instr(vpmaddwd))]
6403pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6404    unsafe {
6405        let madd = _mm_madd_epi16(a, b).as_i32x4();
6406        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
6407    }
6408}
6409
6410/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
6413#[inline]
6414#[target_feature(enable = "avx512bw,avx512vl")]
6415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6416#[cfg_attr(test, assert_instr(vpmaddwd))]
6417pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6418    unsafe {
6419        let madd = _mm_madd_epi16(a, b).as_i32x4();
6420        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
6421    }
6422}
6423
6424/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
6425///
6426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
6427#[inline]
6428#[target_feature(enable = "avx512bw")]
6429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6430#[cfg_attr(test, assert_instr(vpmaddubsw))]
6431pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
6432    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
6433}
6434
6435/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6436///
6437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
6438#[inline]
6439#[target_feature(enable = "avx512bw")]
6440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6441#[cfg_attr(test, assert_instr(vpmaddubsw))]
6442pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6443    unsafe {
6444        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6445        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
6446    }
6447}
6448
6449/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6450///
6451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
6452#[inline]
6453#[target_feature(enable = "avx512bw")]
6454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6455#[cfg_attr(test, assert_instr(vpmaddubsw))]
6456pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6457    unsafe {
6458        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6459        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
6460    }
6461}
6462
6463/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6464///
6465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
6466#[inline]
6467#[target_feature(enable = "avx512bw,avx512vl")]
6468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6469#[cfg_attr(test, assert_instr(vpmaddubsw))]
6470pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6471    unsafe {
6472        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6473        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6474    }
6475}
6476
6477/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6478///
6479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6480#[inline]
6481#[target_feature(enable = "avx512bw,avx512vl")]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483#[cfg_attr(test, assert_instr(vpmaddubsw))]
6484pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6485    unsafe {
6486        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6487        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6488    }
6489}
6490
6491/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6494#[inline]
6495#[target_feature(enable = "avx512bw,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vpmaddubsw))]
6498pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6499    unsafe {
6500        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6501        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6502    }
6503}
6504
6505/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6506///
6507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6508#[inline]
6509#[target_feature(enable = "avx512bw,avx512vl")]
6510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6511#[cfg_attr(test, assert_instr(vpmaddubsw))]
6512pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6513    unsafe {
6514        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6515        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6516    }
6517}
6518
6519/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6522#[inline]
6523#[target_feature(enable = "avx512bw")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vpackssdw))]
6526pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6527    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6528}
6529
6530/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6531///
6532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6533#[inline]
6534#[target_feature(enable = "avx512bw")]
6535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6536#[cfg_attr(test, assert_instr(vpackssdw))]
6537pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6538    unsafe {
6539        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6540        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6541    }
6542}
6543
6544/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6545///
6546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6547#[inline]
6548#[target_feature(enable = "avx512bw")]
6549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6550#[cfg_attr(test, assert_instr(vpackssdw))]
6551pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6552    unsafe {
6553        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6554        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6555    }
6556}
6557
6558/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6559///
6560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6561#[inline]
6562#[target_feature(enable = "avx512bw,avx512vl")]
6563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6564#[cfg_attr(test, assert_instr(vpackssdw))]
6565pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6566    unsafe {
6567        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6568        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6569    }
6570}
6571
6572/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6573///
6574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6575#[inline]
6576#[target_feature(enable = "avx512bw,avx512vl")]
6577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6578#[cfg_attr(test, assert_instr(vpackssdw))]
6579pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6580    unsafe {
6581        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6582        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6583    }
6584}
6585
6586/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6589#[inline]
6590#[target_feature(enable = "avx512bw,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vpackssdw))]
6593pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6594    unsafe {
6595        let pack = _mm_packs_epi32(a, b).as_i16x8();
6596        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6597    }
6598}
6599
6600/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6601///
6602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6603#[inline]
6604#[target_feature(enable = "avx512bw,avx512vl")]
6605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606#[cfg_attr(test, assert_instr(vpackssdw))]
6607pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6608    unsafe {
6609        let pack = _mm_packs_epi32(a, b).as_i16x8();
6610        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6611    }
6612}
6613
6614/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6615///
6616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6617#[inline]
6618#[target_feature(enable = "avx512bw")]
6619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6620#[cfg_attr(test, assert_instr(vpacksswb))]
6621pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6622    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6623}
6624
6625/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6628#[inline]
6629#[target_feature(enable = "avx512bw")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpacksswb))]
6632pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6633    unsafe {
6634        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6635        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6636    }
6637}
6638
6639/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6642#[inline]
6643#[target_feature(enable = "avx512bw")]
6644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6645#[cfg_attr(test, assert_instr(vpacksswb))]
6646pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6647    unsafe {
6648        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6649        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6650    }
6651}
6652
6653/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6656#[inline]
6657#[target_feature(enable = "avx512bw,avx512vl")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpacksswb))]
6660pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6661    unsafe {
6662        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6663        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6664    }
6665}
6666
6667/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6668///
6669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6670#[inline]
6671#[target_feature(enable = "avx512bw,avx512vl")]
6672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6673#[cfg_attr(test, assert_instr(vpacksswb))]
6674pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6675    unsafe {
6676        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6677        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6678    }
6679}
6680
6681/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6682///
6683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6684#[inline]
6685#[target_feature(enable = "avx512bw,avx512vl")]
6686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6687#[cfg_attr(test, assert_instr(vpacksswb))]
6688pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6689    unsafe {
6690        let pack = _mm_packs_epi16(a, b).as_i8x16();
6691        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6692    }
6693}
6694
6695/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6696///
6697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6698#[inline]
6699#[target_feature(enable = "avx512bw,avx512vl")]
6700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6701#[cfg_attr(test, assert_instr(vpacksswb))]
6702pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6703    unsafe {
6704        let pack = _mm_packs_epi16(a, b).as_i8x16();
6705        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6706    }
6707}
6708
6709/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6710///
6711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6712#[inline]
6713#[target_feature(enable = "avx512bw")]
6714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6715#[cfg_attr(test, assert_instr(vpackusdw))]
6716pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6717    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6718}
6719
6720/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6723#[inline]
6724#[target_feature(enable = "avx512bw")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpackusdw))]
6727pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6728    unsafe {
6729        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6730        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6731    }
6732}
6733
6734/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6735///
6736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6737#[inline]
6738#[target_feature(enable = "avx512bw")]
6739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6740#[cfg_attr(test, assert_instr(vpackusdw))]
6741pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6742    unsafe {
6743        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6744        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6745    }
6746}
6747
6748/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6749///
6750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6751#[inline]
6752#[target_feature(enable = "avx512bw,avx512vl")]
6753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6754#[cfg_attr(test, assert_instr(vpackusdw))]
6755pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6756    unsafe {
6757        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6758        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6759    }
6760}
6761
6762/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6763///
6764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6765#[inline]
6766#[target_feature(enable = "avx512bw,avx512vl")]
6767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6768#[cfg_attr(test, assert_instr(vpackusdw))]
6769pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6770    unsafe {
6771        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6772        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6773    }
6774}
6775
6776/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6777///
6778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6779#[inline]
6780#[target_feature(enable = "avx512bw,avx512vl")]
6781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6782#[cfg_attr(test, assert_instr(vpackusdw))]
6783pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6784    unsafe {
6785        let pack = _mm_packus_epi32(a, b).as_i16x8();
6786        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6787    }
6788}
6789
6790/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6791///
6792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6793#[inline]
6794#[target_feature(enable = "avx512bw,avx512vl")]
6795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6796#[cfg_attr(test, assert_instr(vpackusdw))]
6797pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6798    unsafe {
6799        let pack = _mm_packus_epi32(a, b).as_i16x8();
6800        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6801    }
6802}
6803
6804/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6805///
6806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6807#[inline]
6808#[target_feature(enable = "avx512bw")]
6809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6810#[cfg_attr(test, assert_instr(vpackuswb))]
6811pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6812    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6813}
6814
6815/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6816///
6817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6818#[inline]
6819#[target_feature(enable = "avx512bw")]
6820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6821#[cfg_attr(test, assert_instr(vpackuswb))]
6822pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6823    unsafe {
6824        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6825        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6826    }
6827}
6828
6829/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6830///
6831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6832#[inline]
6833#[target_feature(enable = "avx512bw")]
6834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6835#[cfg_attr(test, assert_instr(vpackuswb))]
6836pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6837    unsafe {
6838        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6839        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6840    }
6841}
6842
6843/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6844///
6845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6846#[inline]
6847#[target_feature(enable = "avx512bw,avx512vl")]
6848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6849#[cfg_attr(test, assert_instr(vpackuswb))]
6850pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6851    unsafe {
6852        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6853        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6854    }
6855}
6856
6857/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6858///
6859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6860#[inline]
6861#[target_feature(enable = "avx512bw,avx512vl")]
6862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6863#[cfg_attr(test, assert_instr(vpackuswb))]
6864pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6865    unsafe {
6866        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6867        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6868    }
6869}
6870
6871/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6872///
6873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6874#[inline]
6875#[target_feature(enable = "avx512bw,avx512vl")]
6876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6877#[cfg_attr(test, assert_instr(vpackuswb))]
6878pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6879    unsafe {
6880        let pack = _mm_packus_epi16(a, b).as_i8x16();
6881        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6882    }
6883}
6884
6885/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6886///
6887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6888#[inline]
6889#[target_feature(enable = "avx512bw,avx512vl")]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891#[cfg_attr(test, assert_instr(vpackuswb))]
6892pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6893    unsafe {
6894        let pack = _mm_packus_epi16(a, b).as_i8x16();
6895        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6896    }
6897}
6898
6899/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6900///
6901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6902#[inline]
6903#[target_feature(enable = "avx512bw")]
6904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6905#[cfg_attr(test, assert_instr(vpavgw))]
6906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6907pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6908    unsafe {
6909        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6910        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6911        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6912        transmute(simd_cast::<_, u16x32>(r))
6913    }
6914}
6915
6916/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6917///
6918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6919#[inline]
6920#[target_feature(enable = "avx512bw")]
6921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6922#[cfg_attr(test, assert_instr(vpavgw))]
6923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6924pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6925    unsafe {
6926        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6927        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6928    }
6929}
6930
6931/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6932///
6933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6934#[inline]
6935#[target_feature(enable = "avx512bw")]
6936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6937#[cfg_attr(test, assert_instr(vpavgw))]
6938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6939pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6940    unsafe {
6941        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6942        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6943    }
6944}
6945
6946/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6947///
6948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6949#[inline]
6950#[target_feature(enable = "avx512bw,avx512vl")]
6951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6952#[cfg_attr(test, assert_instr(vpavgw))]
6953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6954pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6955    unsafe {
6956        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6957        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6958    }
6959}
6960
6961/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6962///
6963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6964#[inline]
6965#[target_feature(enable = "avx512bw,avx512vl")]
6966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6967#[cfg_attr(test, assert_instr(vpavgw))]
6968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6969pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6970    unsafe {
6971        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6972        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6973    }
6974}
6975
6976/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6977///
6978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6979#[inline]
6980#[target_feature(enable = "avx512bw,avx512vl")]
6981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6982#[cfg_attr(test, assert_instr(vpavgw))]
6983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6984pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6985    unsafe {
6986        let avg = _mm_avg_epu16(a, b).as_u16x8();
6987        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6988    }
6989}
6990
6991/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6992///
6993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6994#[inline]
6995#[target_feature(enable = "avx512bw,avx512vl")]
6996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6997#[cfg_attr(test, assert_instr(vpavgw))]
6998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6999pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7000    unsafe {
7001        let avg = _mm_avg_epu16(a, b).as_u16x8();
7002        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
7003    }
7004}
7005
7006/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
7007///
7008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
7009#[inline]
7010#[target_feature(enable = "avx512bw")]
7011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7012#[cfg_attr(test, assert_instr(vpavgb))]
7013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7014pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
7015    unsafe {
7016        let a = simd_cast::<_, u16x64>(a.as_u8x64());
7017        let b = simd_cast::<_, u16x64>(b.as_u8x64());
7018        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
7019        transmute(simd_cast::<_, u8x64>(r))
7020    }
7021}
7022
7023/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7024///
7025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
7026#[inline]
7027#[target_feature(enable = "avx512bw")]
7028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7029#[cfg_attr(test, assert_instr(vpavgb))]
7030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7031pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7032    unsafe {
7033        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7034        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
7035    }
7036}
7037
7038/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7039///
7040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
7041#[inline]
7042#[target_feature(enable = "avx512bw")]
7043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7044#[cfg_attr(test, assert_instr(vpavgb))]
7045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7046pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7047    unsafe {
7048        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7049        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
7050    }
7051}
7052
7053/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7054///
7055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
7056#[inline]
7057#[target_feature(enable = "avx512bw,avx512vl")]
7058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7059#[cfg_attr(test, assert_instr(vpavgb))]
7060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7061pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7062    unsafe {
7063        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7064        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
7065    }
7066}
7067
7068/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
7071#[inline]
7072#[target_feature(enable = "avx512bw,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpavgb))]
7075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7076pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7077    unsafe {
7078        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7079        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
7080    }
7081}
7082
7083/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7084///
7085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
7086#[inline]
7087#[target_feature(enable = "avx512bw,avx512vl")]
7088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7089#[cfg_attr(test, assert_instr(vpavgb))]
7090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7091pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7092    unsafe {
7093        let avg = _mm_avg_epu8(a, b).as_u8x16();
7094        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
7095    }
7096}
7097
7098/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7099///
7100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
7101#[inline]
7102#[target_feature(enable = "avx512bw,avx512vl")]
7103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7104#[cfg_attr(test, assert_instr(vpavgb))]
7105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7106pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7107    unsafe {
7108        let avg = _mm_avg_epu8(a, b).as_u8x16();
7109        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
7110    }
7111}
7112
7113/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
7114///
7115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
7116#[inline]
7117#[target_feature(enable = "avx512bw")]
7118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7119#[cfg_attr(test, assert_instr(vpsllw))]
7120pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
7121    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
7122}
7123
7124/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
7127#[inline]
7128#[target_feature(enable = "avx512bw")]
7129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7130#[cfg_attr(test, assert_instr(vpsllw))]
7131pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7132    unsafe {
7133        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7134        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7135    }
7136}
7137
7138/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7139///
7140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
7141#[inline]
7142#[target_feature(enable = "avx512bw")]
7143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7144#[cfg_attr(test, assert_instr(vpsllw))]
7145pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7146    unsafe {
7147        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7148        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7149    }
7150}
7151
7152/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7153///
7154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
7155#[inline]
7156#[target_feature(enable = "avx512bw,avx512vl")]
7157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7158#[cfg_attr(test, assert_instr(vpsllw))]
7159pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7160    unsafe {
7161        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7162        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7163    }
7164}
7165
7166/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7167///
7168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
7169#[inline]
7170#[target_feature(enable = "avx512bw,avx512vl")]
7171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7172#[cfg_attr(test, assert_instr(vpsllw))]
7173pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7174    unsafe {
7175        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7176        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7177    }
7178}
7179
7180/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7181///
7182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
7183#[inline]
7184#[target_feature(enable = "avx512bw,avx512vl")]
7185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7186#[cfg_attr(test, assert_instr(vpsllw))]
7187pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7188    unsafe {
7189        let shf = _mm_sll_epi16(a, count).as_i16x8();
7190        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7191    }
7192}
7193
7194/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7195///
7196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
7197#[inline]
7198#[target_feature(enable = "avx512bw,avx512vl")]
7199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7200#[cfg_attr(test, assert_instr(vpsllw))]
7201pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7202    unsafe {
7203        let shf = _mm_sll_epi16(a, count).as_i16x8();
7204        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7205    }
7206}
7207
7208/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
7209///
7210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
7211#[inline]
7212#[target_feature(enable = "avx512bw")]
7213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7214#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7215#[rustc_legacy_const_generics(1)]
7216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7217pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7218    unsafe {
7219        static_assert_uimm_bits!(IMM8, 8);
7220        if IMM8 >= 16 {
7221            _mm512_setzero_si512()
7222        } else {
7223            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7224        }
7225    }
7226}
7227
7228/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7229///
7230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
7231#[inline]
7232#[target_feature(enable = "avx512bw")]
7233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7234#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7235#[rustc_legacy_const_generics(3)]
7236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7237pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
7238    src: __m512i,
7239    k: __mmask32,
7240    a: __m512i,
7241) -> __m512i {
7242    unsafe {
7243        static_assert_uimm_bits!(IMM8, 8);
7244        let shf = if IMM8 >= 16 {
7245            u16x32::ZERO
7246        } else {
7247            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7248        };
7249        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7250    }
7251}
7252
7253/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7254///
7255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
7256#[inline]
7257#[target_feature(enable = "avx512bw")]
7258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7259#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7260#[rustc_legacy_const_generics(2)]
7261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7262pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7263    unsafe {
7264        static_assert_uimm_bits!(IMM8, 8);
7265        if IMM8 >= 16 {
7266            _mm512_setzero_si512()
7267        } else {
7268            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7269            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7270        }
7271    }
7272}
7273
7274/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7275///
7276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
7277#[inline]
7278#[target_feature(enable = "avx512bw,avx512vl")]
7279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7280#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7281#[rustc_legacy_const_generics(3)]
7282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7283pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
7284    src: __m256i,
7285    k: __mmask16,
7286    a: __m256i,
7287) -> __m256i {
7288    unsafe {
7289        static_assert_uimm_bits!(IMM8, 8);
7290        let shf = if IMM8 >= 16 {
7291            u16x16::ZERO
7292        } else {
7293            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
7294        };
7295        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
7296    }
7297}
7298
7299/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7300///
7301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
7302#[inline]
7303#[target_feature(enable = "avx512bw,avx512vl")]
7304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7305#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7306#[rustc_legacy_const_generics(2)]
7307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7308pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7309    unsafe {
7310        static_assert_uimm_bits!(IMM8, 8);
7311        if IMM8 >= 16 {
7312            _mm256_setzero_si256()
7313        } else {
7314            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
7315            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
7316        }
7317    }
7318}
7319
7320/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7321///
7322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
7323#[inline]
7324#[target_feature(enable = "avx512bw,avx512vl")]
7325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7326#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7327#[rustc_legacy_const_generics(3)]
7328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7329pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
7330    src: __m128i,
7331    k: __mmask8,
7332    a: __m128i,
7333) -> __m128i {
7334    unsafe {
7335        static_assert_uimm_bits!(IMM8, 8);
7336        let shf = if IMM8 >= 16 {
7337            u16x8::ZERO
7338        } else {
7339            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
7340        };
7341        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
7342    }
7343}
7344
7345/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7346///
7347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
7348#[inline]
7349#[target_feature(enable = "avx512bw,avx512vl")]
7350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7351#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7352#[rustc_legacy_const_generics(2)]
7353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7354pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7355    unsafe {
7356        static_assert_uimm_bits!(IMM8, 8);
7357        if IMM8 >= 16 {
7358            _mm_setzero_si128()
7359        } else {
7360            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
7361            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
7362        }
7363    }
7364}
7365
7366/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7367///
7368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
7369#[inline]
7370#[target_feature(enable = "avx512bw")]
7371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7372#[cfg_attr(test, assert_instr(vpsllvw))]
7373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7374pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
7375    unsafe {
7376        let count = count.as_u16x32();
7377        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7378        let count = simd_select(no_overflow, count, u16x32::ZERO);
7379        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7380    }
7381}
7382
7383/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7384///
7385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
7386#[inline]
7387#[target_feature(enable = "avx512bw")]
7388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7389#[cfg_attr(test, assert_instr(vpsllvw))]
7390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7391pub const fn _mm512_mask_sllv_epi16(
7392    src: __m512i,
7393    k: __mmask32,
7394    a: __m512i,
7395    count: __m512i,
7396) -> __m512i {
7397    unsafe {
7398        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7399        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7400    }
7401}
7402
7403/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7404///
7405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
7406#[inline]
7407#[target_feature(enable = "avx512bw")]
7408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7409#[cfg_attr(test, assert_instr(vpsllvw))]
7410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7411pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7412    unsafe {
7413        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7414        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7415    }
7416}
7417
7418/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7419///
7420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
7421#[inline]
7422#[target_feature(enable = "avx512bw,avx512vl")]
7423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7424#[cfg_attr(test, assert_instr(vpsllvw))]
7425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7426pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
7427    unsafe {
7428        let count = count.as_u16x16();
7429        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7430        let count = simd_select(no_overflow, count, u16x16::ZERO);
7431        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7432    }
7433}
7434
7435/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7436///
7437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
7438#[inline]
7439#[target_feature(enable = "avx512bw,avx512vl")]
7440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7441#[cfg_attr(test, assert_instr(vpsllvw))]
7442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7443pub const fn _mm256_mask_sllv_epi16(
7444    src: __m256i,
7445    k: __mmask16,
7446    a: __m256i,
7447    count: __m256i,
7448) -> __m256i {
7449    unsafe {
7450        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7451        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7452    }
7453}
7454
7455/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7456///
7457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
7458#[inline]
7459#[target_feature(enable = "avx512bw,avx512vl")]
7460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7461#[cfg_attr(test, assert_instr(vpsllvw))]
7462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7463pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7464    unsafe {
7465        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7466        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7467    }
7468}
7469
7470/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7471///
7472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
7473#[inline]
7474#[target_feature(enable = "avx512bw,avx512vl")]
7475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7476#[cfg_attr(test, assert_instr(vpsllvw))]
7477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7478pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
7479    unsafe {
7480        let count = count.as_u16x8();
7481        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7482        let count = simd_select(no_overflow, count, u16x8::ZERO);
7483        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7484    }
7485}
7486
7487/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7488///
7489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
7490#[inline]
7491#[target_feature(enable = "avx512bw,avx512vl")]
7492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7493#[cfg_attr(test, assert_instr(vpsllvw))]
7494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7495pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7496    unsafe {
7497        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7498        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7499    }
7500}
7501
7502/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7503///
7504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
7505#[inline]
7506#[target_feature(enable = "avx512bw,avx512vl")]
7507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7508#[cfg_attr(test, assert_instr(vpsllvw))]
7509#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7510pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7511    unsafe {
7512        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7513        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7514    }
7515}
7516
7517/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
7518///
7519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
7520#[inline]
7521#[target_feature(enable = "avx512bw")]
7522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7523#[cfg_attr(test, assert_instr(vpsrlw))]
7524pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
7525    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7526}
7527
7528/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7529///
7530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7531#[inline]
7532#[target_feature(enable = "avx512bw")]
7533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7534#[cfg_attr(test, assert_instr(vpsrlw))]
7535pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7536    unsafe {
7537        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7538        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7539    }
7540}
7541
7542/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7543///
7544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7545#[inline]
7546#[target_feature(enable = "avx512bw")]
7547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7548#[cfg_attr(test, assert_instr(vpsrlw))]
7549pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7550    unsafe {
7551        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7552        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7553    }
7554}
7555
7556/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7559#[inline]
7560#[target_feature(enable = "avx512bw,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vpsrlw))]
7563pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7564    unsafe {
7565        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7566        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7567    }
7568}
7569
7570/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7571///
7572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7573#[inline]
7574#[target_feature(enable = "avx512bw,avx512vl")]
7575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7576#[cfg_attr(test, assert_instr(vpsrlw))]
7577pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7578    unsafe {
7579        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7580        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7581    }
7582}
7583
7584/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7585///
7586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7587#[inline]
7588#[target_feature(enable = "avx512bw,avx512vl")]
7589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7590#[cfg_attr(test, assert_instr(vpsrlw))]
7591pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7592    unsafe {
7593        let shf = _mm_srl_epi16(a, count).as_i16x8();
7594        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7595    }
7596}
7597
7598/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7599///
7600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7601#[inline]
7602#[target_feature(enable = "avx512bw,avx512vl")]
7603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7604#[cfg_attr(test, assert_instr(vpsrlw))]
7605pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7606    unsafe {
7607        let shf = _mm_srl_epi16(a, count).as_i16x8();
7608        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7609    }
7610}
7611
7612/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7613///
7614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7615#[inline]
7616#[target_feature(enable = "avx512bw")]
7617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7618#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7619#[rustc_legacy_const_generics(1)]
7620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7621pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7622    unsafe {
7623        static_assert_uimm_bits!(IMM8, 8);
7624        if IMM8 >= 16 {
7625            _mm512_setzero_si512()
7626        } else {
7627            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7628        }
7629    }
7630}
7631
7632/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7633///
7634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7635#[inline]
7636#[target_feature(enable = "avx512bw")]
7637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7638#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7639#[rustc_legacy_const_generics(3)]
7640#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7641pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
7642    src: __m512i,
7643    k: __mmask32,
7644    a: __m512i,
7645) -> __m512i {
7646    unsafe {
7647        static_assert_uimm_bits!(IMM8, 8);
7648        let shf = if IMM8 >= 16 {
7649            u16x32::ZERO
7650        } else {
7651            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7652        };
7653        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7654    }
7655}
7656
7657/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7658///
7659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7660#[inline]
7661#[target_feature(enable = "avx512bw")]
7662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7663#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7664#[rustc_legacy_const_generics(2)]
7665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7666pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7667    unsafe {
7668        static_assert_uimm_bits!(IMM8, 8);
7669        //imm8 should be u32, it seems the document to verify is incorrect
7670        if IMM8 >= 16 {
7671            _mm512_setzero_si512()
7672        } else {
7673            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7674            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7675        }
7676    }
7677}
7678
7679/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7680///
7681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7682#[inline]
7683#[target_feature(enable = "avx512bw,avx512vl")]
7684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7685#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7686#[rustc_legacy_const_generics(3)]
7687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7688pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
7689    src: __m256i,
7690    k: __mmask16,
7691    a: __m256i,
7692) -> __m256i {
7693    unsafe {
7694        static_assert_uimm_bits!(IMM8, 8);
7695        let shf = _mm256_srli_epi16::<IMM8>(a);
7696        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7697    }
7698}
7699
7700/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7701///
7702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7703#[inline]
7704#[target_feature(enable = "avx512bw,avx512vl")]
7705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7706#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7707#[rustc_legacy_const_generics(2)]
7708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7709pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7710    unsafe {
7711        static_assert_uimm_bits!(IMM8, 8);
7712        let shf = _mm256_srli_epi16::<IMM8>(a);
7713        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7714    }
7715}
7716
7717/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7718///
7719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7720#[inline]
7721#[target_feature(enable = "avx512bw,avx512vl")]
7722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7723#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7724#[rustc_legacy_const_generics(3)]
7725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7726pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
7727    src: __m128i,
7728    k: __mmask8,
7729    a: __m128i,
7730) -> __m128i {
7731    unsafe {
7732        static_assert_uimm_bits!(IMM8, 8);
7733        let shf = _mm_srli_epi16::<IMM8>(a);
7734        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7735    }
7736}
7737
7738/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7739///
7740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7741#[inline]
7742#[target_feature(enable = "avx512bw,avx512vl")]
7743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7744#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7745#[rustc_legacy_const_generics(2)]
7746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7747pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7748    unsafe {
7749        static_assert_uimm_bits!(IMM8, 8);
7750        let shf = _mm_srli_epi16::<IMM8>(a);
7751        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7752    }
7753}
7754
7755/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7756///
7757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7758#[inline]
7759#[target_feature(enable = "avx512bw")]
7760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7761#[cfg_attr(test, assert_instr(vpsrlvw))]
7762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7763pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7764    unsafe {
7765        let count = count.as_u16x32();
7766        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7767        let count = simd_select(no_overflow, count, u16x32::ZERO);
7768        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7769    }
7770}
7771
7772/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7773///
7774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7775#[inline]
7776#[target_feature(enable = "avx512bw")]
7777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7778#[cfg_attr(test, assert_instr(vpsrlvw))]
7779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7780pub const fn _mm512_mask_srlv_epi16(
7781    src: __m512i,
7782    k: __mmask32,
7783    a: __m512i,
7784    count: __m512i,
7785) -> __m512i {
7786    unsafe {
7787        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7788        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7789    }
7790}
7791
7792/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7793///
7794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7795#[inline]
7796#[target_feature(enable = "avx512bw")]
7797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7798#[cfg_attr(test, assert_instr(vpsrlvw))]
7799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7800pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7801    unsafe {
7802        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7803        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7804    }
7805}
7806
7807/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7808///
7809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7810#[inline]
7811#[target_feature(enable = "avx512bw,avx512vl")]
7812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7813#[cfg_attr(test, assert_instr(vpsrlvw))]
7814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7815pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7816    unsafe {
7817        let count = count.as_u16x16();
7818        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7819        let count = simd_select(no_overflow, count, u16x16::ZERO);
7820        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7821    }
7822}
7823
7824/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7825///
7826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7827#[inline]
7828#[target_feature(enable = "avx512bw,avx512vl")]
7829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7830#[cfg_attr(test, assert_instr(vpsrlvw))]
7831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7832pub const fn _mm256_mask_srlv_epi16(
7833    src: __m256i,
7834    k: __mmask16,
7835    a: __m256i,
7836    count: __m256i,
7837) -> __m256i {
7838    unsafe {
7839        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7840        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7841    }
7842}
7843
7844/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7847#[inline]
7848#[target_feature(enable = "avx512bw,avx512vl")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vpsrlvw))]
7851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7852pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7853    unsafe {
7854        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7855        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7856    }
7857}
7858
7859/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7860///
7861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7862#[inline]
7863#[target_feature(enable = "avx512bw,avx512vl")]
7864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7865#[cfg_attr(test, assert_instr(vpsrlvw))]
7866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7867pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7868    unsafe {
7869        let count = count.as_u16x8();
7870        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7871        let count = simd_select(no_overflow, count, u16x8::ZERO);
7872        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7873    }
7874}
7875
7876/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7877///
7878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7879#[inline]
7880#[target_feature(enable = "avx512bw,avx512vl")]
7881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7882#[cfg_attr(test, assert_instr(vpsrlvw))]
7883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7884pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7885    unsafe {
7886        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7887        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7888    }
7889}
7890
7891/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7892///
7893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7894#[inline]
7895#[target_feature(enable = "avx512bw,avx512vl")]
7896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7897#[cfg_attr(test, assert_instr(vpsrlvw))]
7898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7899pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7900    unsafe {
7901        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7902        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7903    }
7904}
7905
7906/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7907///
7908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7909#[inline]
7910#[target_feature(enable = "avx512bw")]
7911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7912#[cfg_attr(test, assert_instr(vpsraw))]
7913pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7914    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7915}
7916
7917/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7918///
7919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7920#[inline]
7921#[target_feature(enable = "avx512bw")]
7922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7923#[cfg_attr(test, assert_instr(vpsraw))]
7924pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7925    unsafe {
7926        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7927        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7928    }
7929}
7930
7931/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7932///
7933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7934#[inline]
7935#[target_feature(enable = "avx512bw")]
7936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7937#[cfg_attr(test, assert_instr(vpsraw))]
7938pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7939    unsafe {
7940        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7941        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7942    }
7943}
7944
7945/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7946///
7947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7948#[inline]
7949#[target_feature(enable = "avx512bw,avx512vl")]
7950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7951#[cfg_attr(test, assert_instr(vpsraw))]
7952pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7953    unsafe {
7954        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7955        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7956    }
7957}
7958
7959/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7960///
7961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7962#[inline]
7963#[target_feature(enable = "avx512bw,avx512vl")]
7964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7965#[cfg_attr(test, assert_instr(vpsraw))]
7966pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7967    unsafe {
7968        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7969        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7970    }
7971}
7972
7973/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7974///
7975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7976#[inline]
7977#[target_feature(enable = "avx512bw,avx512vl")]
7978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7979#[cfg_attr(test, assert_instr(vpsraw))]
7980pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7981    unsafe {
7982        let shf = _mm_sra_epi16(a, count).as_i16x8();
7983        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7984    }
7985}
7986
7987/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7990#[inline]
7991#[target_feature(enable = "avx512bw,avx512vl")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vpsraw))]
7994pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7995    unsafe {
7996        let shf = _mm_sra_epi16(a, count).as_i16x8();
7997        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7998    }
7999}
8000
8001/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
8002///
8003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
8004#[inline]
8005#[target_feature(enable = "avx512bw")]
8006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8007#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8008#[rustc_legacy_const_generics(1)]
8009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8010pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
8011    unsafe {
8012        static_assert_uimm_bits!(IMM8, 8);
8013        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
8014    }
8015}
8016
8017/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8018///
8019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
8020#[inline]
8021#[target_feature(enable = "avx512bw")]
8022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8023#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8024#[rustc_legacy_const_generics(3)]
8025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8026pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
8027    src: __m512i,
8028    k: __mmask32,
8029    a: __m512i,
8030) -> __m512i {
8031    unsafe {
8032        static_assert_uimm_bits!(IMM8, 8);
8033        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8034        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8035    }
8036}
8037
8038/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8039///
8040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
8041#[inline]
8042#[target_feature(enable = "avx512bw")]
8043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8044#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8045#[rustc_legacy_const_generics(2)]
8046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8047pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
8048    unsafe {
8049        static_assert_uimm_bits!(IMM8, 8);
8050        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8051        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8052    }
8053}
8054
8055/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8056///
8057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
8058#[inline]
8059#[target_feature(enable = "avx512bw,avx512vl")]
8060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8061#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8062#[rustc_legacy_const_generics(3)]
8063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8064pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
8065    src: __m256i,
8066    k: __mmask16,
8067    a: __m256i,
8068) -> __m256i {
8069    unsafe {
8070        static_assert_uimm_bits!(IMM8, 8);
8071        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8072        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8073    }
8074}
8075
8076/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8077///
8078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
8079#[inline]
8080#[target_feature(enable = "avx512bw,avx512vl")]
8081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8082#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8083#[rustc_legacy_const_generics(2)]
8084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8085pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
8086    unsafe {
8087        static_assert_uimm_bits!(IMM8, 8);
8088        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8089        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8090    }
8091}
8092
8093/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8094///
8095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
8096#[inline]
8097#[target_feature(enable = "avx512bw,avx512vl")]
8098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8099#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8100#[rustc_legacy_const_generics(3)]
8101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8102pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
8103    src: __m128i,
8104    k: __mmask8,
8105    a: __m128i,
8106) -> __m128i {
8107    unsafe {
8108        static_assert_uimm_bits!(IMM8, 8);
8109        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8110        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8111    }
8112}
8113
8114/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8115///
8116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
8117#[inline]
8118#[target_feature(enable = "avx512bw,avx512vl")]
8119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8120#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8121#[rustc_legacy_const_generics(2)]
8122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8123pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
8124    unsafe {
8125        static_assert_uimm_bits!(IMM8, 8);
8126        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8127        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8128    }
8129}
8130
8131/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8132///
8133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
8134#[inline]
8135#[target_feature(enable = "avx512bw")]
8136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8137#[cfg_attr(test, assert_instr(vpsravw))]
8138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8139pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
8140    unsafe {
8141        let count = count.as_u16x32();
8142        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
8143        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
8144        simd_shr(a.as_i16x32(), count).as_m512i()
8145    }
8146}
8147
8148/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8149///
8150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
8151#[inline]
8152#[target_feature(enable = "avx512bw")]
8153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8154#[cfg_attr(test, assert_instr(vpsravw))]
8155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8156pub const fn _mm512_mask_srav_epi16(
8157    src: __m512i,
8158    k: __mmask32,
8159    a: __m512i,
8160    count: __m512i,
8161) -> __m512i {
8162    unsafe {
8163        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8164        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8165    }
8166}
8167
8168/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8169///
8170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
8173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8174#[cfg_attr(test, assert_instr(vpsravw))]
8175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8176pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
8177    unsafe {
8178        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8179        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8180    }
8181}
8182
8183/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8184///
8185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
8186#[inline]
8187#[target_feature(enable = "avx512bw,avx512vl")]
8188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8189#[cfg_attr(test, assert_instr(vpsravw))]
8190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8191pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
8192    unsafe {
8193        let count = count.as_u16x16();
8194        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
8195        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
8196        simd_shr(a.as_i16x16(), count).as_m256i()
8197    }
8198}
8199
8200/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8201///
8202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
8203#[inline]
8204#[target_feature(enable = "avx512bw,avx512vl")]
8205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8206#[cfg_attr(test, assert_instr(vpsravw))]
8207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8208pub const fn _mm256_mask_srav_epi16(
8209    src: __m256i,
8210    k: __mmask16,
8211    a: __m256i,
8212    count: __m256i,
8213) -> __m256i {
8214    unsafe {
8215        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8216        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8217    }
8218}
8219
8220/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8221///
8222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
8223#[inline]
8224#[target_feature(enable = "avx512bw,avx512vl")]
8225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8226#[cfg_attr(test, assert_instr(vpsravw))]
8227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8228pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8229    unsafe {
8230        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8231        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8232    }
8233}
8234
8235/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8236///
8237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
8238#[inline]
8239#[target_feature(enable = "avx512bw,avx512vl")]
8240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8241#[cfg_attr(test, assert_instr(vpsravw))]
8242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8243pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
8244    unsafe {
8245        let count = count.as_u16x8();
8246        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8247        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
8248        simd_shr(a.as_i16x8(), count).as_m128i()
8249    }
8250}
8251
8252/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8253///
8254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
8255#[inline]
8256#[target_feature(enable = "avx512bw,avx512vl")]
8257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8258#[cfg_attr(test, assert_instr(vpsravw))]
8259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8260pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8261    unsafe {
8262        let shf = _mm_srav_epi16(a, count).as_i16x8();
8263        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8264    }
8265}
8266
8267/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8268///
8269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
8270#[inline]
8271#[target_feature(enable = "avx512bw,avx512vl")]
8272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8273#[cfg_attr(test, assert_instr(vpsravw))]
8274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8275pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8276    unsafe {
8277        let shf = _mm_srav_epi16(a, count).as_i16x8();
8278        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8279    }
8280}
8281
8282/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8283///
8284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
8285#[inline]
8286#[target_feature(enable = "avx512bw")]
8287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8288#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8289pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8290    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
8291}
8292
8293/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
8296#[inline]
8297#[target_feature(enable = "avx512bw")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vpermt2w))]
8300pub fn _mm512_mask_permutex2var_epi16(
8301    a: __m512i,
8302    k: __mmask32,
8303    idx: __m512i,
8304    b: __m512i,
8305) -> __m512i {
8306    unsafe {
8307        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8308        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
8309    }
8310}
8311
8312/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8313///
8314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
8315#[inline]
8316#[target_feature(enable = "avx512bw")]
8317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8318#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8319pub fn _mm512_maskz_permutex2var_epi16(
8320    k: __mmask32,
8321    a: __m512i,
8322    idx: __m512i,
8323    b: __m512i,
8324) -> __m512i {
8325    unsafe {
8326        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8327        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8328    }
8329}
8330
8331/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8332///
8333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
8334#[inline]
8335#[target_feature(enable = "avx512bw")]
8336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8337#[cfg_attr(test, assert_instr(vpermi2w))]
8338pub fn _mm512_mask2_permutex2var_epi16(
8339    a: __m512i,
8340    idx: __m512i,
8341    k: __mmask32,
8342    b: __m512i,
8343) -> __m512i {
8344    unsafe {
8345        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8346        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
8347    }
8348}
8349
8350/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8351///
8352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
8353#[inline]
8354#[target_feature(enable = "avx512bw,avx512vl")]
8355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8356#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8357pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
8358    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
8359}
8360
8361/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8362///
8363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
8364#[inline]
8365#[target_feature(enable = "avx512bw,avx512vl")]
8366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8367#[cfg_attr(test, assert_instr(vpermt2w))]
8368pub fn _mm256_mask_permutex2var_epi16(
8369    a: __m256i,
8370    k: __mmask16,
8371    idx: __m256i,
8372    b: __m256i,
8373) -> __m256i {
8374    unsafe {
8375        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8376        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
8377    }
8378}
8379
8380/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8381///
8382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
8383#[inline]
8384#[target_feature(enable = "avx512bw,avx512vl")]
8385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8386#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8387pub fn _mm256_maskz_permutex2var_epi16(
8388    k: __mmask16,
8389    a: __m256i,
8390    idx: __m256i,
8391    b: __m256i,
8392) -> __m256i {
8393    unsafe {
8394        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8395        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8396    }
8397}
8398
8399/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
8402#[inline]
8403#[target_feature(enable = "avx512bw,avx512vl")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vpermi2w))]
8406pub fn _mm256_mask2_permutex2var_epi16(
8407    a: __m256i,
8408    idx: __m256i,
8409    k: __mmask16,
8410    b: __m256i,
8411) -> __m256i {
8412    unsafe {
8413        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8414        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
8415    }
8416}
8417
8418/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8419///
8420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
8421#[inline]
8422#[target_feature(enable = "avx512bw,avx512vl")]
8423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8424#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8425pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8426    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
8427}
8428
8429/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8430///
8431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
8432#[inline]
8433#[target_feature(enable = "avx512bw,avx512vl")]
8434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8435#[cfg_attr(test, assert_instr(vpermt2w))]
8436pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
8437    unsafe {
8438        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8439        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
8440    }
8441}
8442
8443/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8444///
8445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
8446#[inline]
8447#[target_feature(enable = "avx512bw,avx512vl")]
8448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8449#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8450pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8451    unsafe {
8452        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8453        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8454    }
8455}
8456
8457/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8458///
8459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
8460#[inline]
8461#[target_feature(enable = "avx512bw,avx512vl")]
8462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8463#[cfg_attr(test, assert_instr(vpermi2w))]
8464pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
8465    unsafe {
8466        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8467        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
8468    }
8469}
8470
8471/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8472///
8473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
8474#[inline]
8475#[target_feature(enable = "avx512bw")]
8476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8477#[cfg_attr(test, assert_instr(vpermw))]
8478pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
8479    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
8480}
8481
8482/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8483///
8484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
8485#[inline]
8486#[target_feature(enable = "avx512bw")]
8487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8488#[cfg_attr(test, assert_instr(vpermw))]
8489pub fn _mm512_mask_permutexvar_epi16(
8490    src: __m512i,
8491    k: __mmask32,
8492    idx: __m512i,
8493    a: __m512i,
8494) -> __m512i {
8495    unsafe {
8496        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8497        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
8498    }
8499}
8500
8501/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8502///
8503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
8504#[inline]
8505#[target_feature(enable = "avx512bw")]
8506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8507#[cfg_attr(test, assert_instr(vpermw))]
8508pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
8509    unsafe {
8510        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8511        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8512    }
8513}
8514
8515/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8516///
8517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
8518#[inline]
8519#[target_feature(enable = "avx512bw,avx512vl")]
8520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8521#[cfg_attr(test, assert_instr(vpermw))]
8522pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
8523    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
8524}
8525
8526/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8527///
8528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
8529#[inline]
8530#[target_feature(enable = "avx512bw,avx512vl")]
8531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8532#[cfg_attr(test, assert_instr(vpermw))]
8533pub fn _mm256_mask_permutexvar_epi16(
8534    src: __m256i,
8535    k: __mmask16,
8536    idx: __m256i,
8537    a: __m256i,
8538) -> __m256i {
8539    unsafe {
8540        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8541        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
8542    }
8543}
8544
8545/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
8548#[inline]
8549#[target_feature(enable = "avx512bw,avx512vl")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vpermw))]
8552pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
8553    unsafe {
8554        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8555        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8556    }
8557}
8558
8559/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8560///
8561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
8562#[inline]
8563#[target_feature(enable = "avx512bw,avx512vl")]
8564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8565#[cfg_attr(test, assert_instr(vpermw))]
8566pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
8567    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
8568}
8569
8570/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8571///
8572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
8573#[inline]
8574#[target_feature(enable = "avx512bw,avx512vl")]
8575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8576#[cfg_attr(test, assert_instr(vpermw))]
8577pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8578    unsafe {
8579        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8580        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
8581    }
8582}
8583
8584/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8585///
8586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
8587#[inline]
8588#[target_feature(enable = "avx512bw,avx512vl")]
8589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8590#[cfg_attr(test, assert_instr(vpermw))]
8591pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8592    unsafe {
8593        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8594        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8595    }
8596}
8597
8598/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8599///
8600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
8601#[inline]
8602#[target_feature(enable = "avx512bw")]
8603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8604#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8606pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8607    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8608}
8609
8610/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8613#[inline]
8614#[target_feature(enable = "avx512bw,avx512vl")]
8615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8616#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8618pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8619    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8620}
8621
8622/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8623///
8624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8625#[inline]
8626#[target_feature(enable = "avx512bw,avx512vl")]
8627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8628#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8630pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8631    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8632}
8633
8634/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8635///
8636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8637#[inline]
8638#[target_feature(enable = "avx512bw")]
8639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8640#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8642pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8643    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8644}
8645
8646/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8647///
8648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8649#[inline]
8650#[target_feature(enable = "avx512bw,avx512vl")]
8651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8652#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8653#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8654pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8655    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8656}
8657
8658/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8659///
8660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8661#[inline]
8662#[target_feature(enable = "avx512bw,avx512vl")]
8663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8664#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8666pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8667    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8668}
8669
8670/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8671///
8672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8673#[inline]
8674#[target_feature(enable = "avx512bw")]
8675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8676#[cfg_attr(test, assert_instr(vpbroadcastw))]
8677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8678pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8679    unsafe {
8680        let a = _mm512_castsi128_si512(a).as_i16x32();
8681        let ret: i16x32 = simd_shuffle!(
8682            a,
8683            a,
8684            [
8685                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8686                0, 0, 0, 0,
8687            ],
8688        );
8689        transmute(ret)
8690    }
8691}
8692
8693/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8694///
8695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8696#[inline]
8697#[target_feature(enable = "avx512bw")]
8698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8699#[cfg_attr(test, assert_instr(vpbroadcastw))]
8700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8701pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8702    unsafe {
8703        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8704        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8705    }
8706}
8707
8708/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8709///
8710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8711#[inline]
8712#[target_feature(enable = "avx512bw")]
8713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8714#[cfg_attr(test, assert_instr(vpbroadcastw))]
8715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8716pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8717    unsafe {
8718        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8719        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8720    }
8721}
8722
8723/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8724///
8725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8726#[inline]
8727#[target_feature(enable = "avx512bw,avx512vl")]
8728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8729#[cfg_attr(test, assert_instr(vpbroadcastw))]
8730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8731pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8732    unsafe {
8733        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8734        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8735    }
8736}
8737
8738/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8739///
8740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8741#[inline]
8742#[target_feature(enable = "avx512bw,avx512vl")]
8743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8744#[cfg_attr(test, assert_instr(vpbroadcastw))]
8745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8746pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8747    unsafe {
8748        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8749        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8750    }
8751}
8752
8753/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8754///
8755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8756#[inline]
8757#[target_feature(enable = "avx512bw,avx512vl")]
8758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8759#[cfg_attr(test, assert_instr(vpbroadcastw))]
8760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8761pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8762    unsafe {
8763        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8764        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8765    }
8766}
8767
8768/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8769///
8770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8771#[inline]
8772#[target_feature(enable = "avx512bw,avx512vl")]
8773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8774#[cfg_attr(test, assert_instr(vpbroadcastw))]
8775#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8776pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8777    unsafe {
8778        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8779        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8780    }
8781}
8782
8783/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8784///
8785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8786#[inline]
8787#[target_feature(enable = "avx512bw")]
8788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8789#[cfg_attr(test, assert_instr(vpbroadcastb))]
8790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8791pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8792    unsafe {
8793        let a = _mm512_castsi128_si512(a).as_i8x64();
8794        let ret: i8x64 = simd_shuffle!(
8795            a,
8796            a,
8797            [
8798                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8799                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8800                0, 0, 0, 0, 0, 0, 0, 0,
8801            ],
8802        );
8803        transmute(ret)
8804    }
8805}
8806
8807/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8808///
8809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8810#[inline]
8811#[target_feature(enable = "avx512bw")]
8812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8813#[cfg_attr(test, assert_instr(vpbroadcastb))]
8814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8815pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8816    unsafe {
8817        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8818        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8819    }
8820}
8821
8822/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8823///
8824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8825#[inline]
8826#[target_feature(enable = "avx512bw")]
8827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8828#[cfg_attr(test, assert_instr(vpbroadcastb))]
8829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8830pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8831    unsafe {
8832        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8833        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8834    }
8835}
8836
8837/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8840#[inline]
8841#[target_feature(enable = "avx512bw,avx512vl")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vpbroadcastb))]
8844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8845pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8846    unsafe {
8847        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8848        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8849    }
8850}
8851
8852/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8853///
8854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8855#[inline]
8856#[target_feature(enable = "avx512bw,avx512vl")]
8857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8858#[cfg_attr(test, assert_instr(vpbroadcastb))]
8859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8860pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8861    unsafe {
8862        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8863        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8864    }
8865}
8866
8867/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8868///
8869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8870#[inline]
8871#[target_feature(enable = "avx512bw,avx512vl")]
8872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8873#[cfg_attr(test, assert_instr(vpbroadcastb))]
8874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8875pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8876    unsafe {
8877        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8878        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8879    }
8880}
8881
8882/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8883///
8884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8885#[inline]
8886#[target_feature(enable = "avx512bw,avx512vl")]
8887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8888#[cfg_attr(test, assert_instr(vpbroadcastb))]
8889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8890pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8891    unsafe {
8892        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8893        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8894    }
8895}
8896
8897/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8898///
8899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8900#[inline]
8901#[target_feature(enable = "avx512bw")]
8902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8903#[cfg_attr(test, assert_instr(vpunpckhwd))]
8904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8905pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8906    unsafe {
8907        let a = a.as_i16x32();
8908        let b = b.as_i16x32();
8909        #[rustfmt::skip]
8910        let r: i16x32 = simd_shuffle!(
8911            a,
8912            b,
8913            [
8914                4, 32 + 4, 5, 32 + 5,
8915                6, 32 + 6, 7, 32 + 7,
8916                12, 32 + 12, 13, 32 + 13,
8917                14, 32 + 14, 15, 32 + 15,
8918                20, 32 + 20, 21, 32 + 21,
8919                22, 32 + 22, 23, 32 + 23,
8920                28, 32 + 28, 29, 32 + 29,
8921                30, 32 + 30, 31, 32 + 31,
8922            ],
8923        );
8924        transmute(r)
8925    }
8926}
8927
8928/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8929///
8930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8931#[inline]
8932#[target_feature(enable = "avx512bw")]
8933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8934#[cfg_attr(test, assert_instr(vpunpckhwd))]
8935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8936pub const fn _mm512_mask_unpackhi_epi16(
8937    src: __m512i,
8938    k: __mmask32,
8939    a: __m512i,
8940    b: __m512i,
8941) -> __m512i {
8942    unsafe {
8943        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8944        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8945    }
8946}
8947
8948/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8949///
8950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8951#[inline]
8952#[target_feature(enable = "avx512bw")]
8953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8954#[cfg_attr(test, assert_instr(vpunpckhwd))]
8955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8956pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8957    unsafe {
8958        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8959        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8960    }
8961}
8962
8963/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8964///
8965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8966#[inline]
8967#[target_feature(enable = "avx512bw,avx512vl")]
8968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8969#[cfg_attr(test, assert_instr(vpunpckhwd))]
8970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8971pub const fn _mm256_mask_unpackhi_epi16(
8972    src: __m256i,
8973    k: __mmask16,
8974    a: __m256i,
8975    b: __m256i,
8976) -> __m256i {
8977    unsafe {
8978        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8979        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8980    }
8981}
8982
8983/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8984///
8985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8986#[inline]
8987#[target_feature(enable = "avx512bw,avx512vl")]
8988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8989#[cfg_attr(test, assert_instr(vpunpckhwd))]
8990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8991pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8992    unsafe {
8993        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8994        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8995    }
8996}
8997
8998/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8999///
9000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
9001#[inline]
9002#[target_feature(enable = "avx512bw,avx512vl")]
9003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9004#[cfg_attr(test, assert_instr(vpunpckhwd))]
9005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9006pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9007    unsafe {
9008        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9009        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
9010    }
9011}
9012
9013/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9014///
9015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
9016#[inline]
9017#[target_feature(enable = "avx512bw,avx512vl")]
9018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9019#[cfg_attr(test, assert_instr(vpunpckhwd))]
9020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9021pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9022    unsafe {
9023        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9024        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
9025    }
9026}
9027
9028/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9029///
9030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
9031#[inline]
9032#[target_feature(enable = "avx512bw")]
9033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9034#[cfg_attr(test, assert_instr(vpunpckhbw))]
9035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9036pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
9037    unsafe {
9038        let a = a.as_i8x64();
9039        let b = b.as_i8x64();
9040        #[rustfmt::skip]
9041        let r: i8x64 = simd_shuffle!(
9042            a,
9043            b,
9044            [
9045                8, 64 + 8, 9, 64 + 9,
9046                10, 64 + 10, 11, 64 + 11,
9047                12, 64 + 12, 13, 64 + 13,
9048                14, 64 + 14, 15, 64 + 15,
9049                24, 64 + 24, 25, 64 + 25,
9050                26, 64 + 26, 27, 64 + 27,
9051                28, 64 + 28, 29, 64 + 29,
9052                30, 64 + 30, 31, 64 + 31,
9053                40, 64 + 40, 41, 64 + 41,
9054                42, 64 + 42, 43, 64 + 43,
9055                44, 64 + 44, 45, 64 + 45,
9056                46, 64 + 46, 47, 64 + 47,
9057                56, 64 + 56, 57, 64 + 57,
9058                58, 64 + 58, 59, 64 + 59,
9059                60, 64 + 60, 61, 64 + 61,
9060                62, 64 + 62, 63, 64 + 63,
9061            ],
9062        );
9063        transmute(r)
9064    }
9065}
9066
9067/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9068///
9069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
9070#[inline]
9071#[target_feature(enable = "avx512bw")]
9072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9073#[cfg_attr(test, assert_instr(vpunpckhbw))]
9074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9075pub const fn _mm512_mask_unpackhi_epi8(
9076    src: __m512i,
9077    k: __mmask64,
9078    a: __m512i,
9079    b: __m512i,
9080) -> __m512i {
9081    unsafe {
9082        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9083        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
9084    }
9085}
9086
9087/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9088///
9089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
9090#[inline]
9091#[target_feature(enable = "avx512bw")]
9092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9093#[cfg_attr(test, assert_instr(vpunpckhbw))]
9094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9095pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9096    unsafe {
9097        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9098        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
9099    }
9100}
9101
9102/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9103///
9104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
9105#[inline]
9106#[target_feature(enable = "avx512bw,avx512vl")]
9107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9108#[cfg_attr(test, assert_instr(vpunpckhbw))]
9109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9110pub const fn _mm256_mask_unpackhi_epi8(
9111    src: __m256i,
9112    k: __mmask32,
9113    a: __m256i,
9114    b: __m256i,
9115) -> __m256i {
9116    unsafe {
9117        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9118        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
9119    }
9120}
9121
9122/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9123///
9124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
9125#[inline]
9126#[target_feature(enable = "avx512bw,avx512vl")]
9127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9128#[cfg_attr(test, assert_instr(vpunpckhbw))]
9129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9130pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9131    unsafe {
9132        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9133        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
9134    }
9135}
9136
9137/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9138///
9139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
9140#[inline]
9141#[target_feature(enable = "avx512bw,avx512vl")]
9142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9143#[cfg_attr(test, assert_instr(vpunpckhbw))]
9144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9145pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9146    unsafe {
9147        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9148        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
9149    }
9150}
9151
9152/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9153///
9154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
9155#[inline]
9156#[target_feature(enable = "avx512bw,avx512vl")]
9157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9158#[cfg_attr(test, assert_instr(vpunpckhbw))]
9159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9160pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9161    unsafe {
9162        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9163        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
9164    }
9165}
9166
9167/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9168///
9169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
9170#[inline]
9171#[target_feature(enable = "avx512bw")]
9172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9173#[cfg_attr(test, assert_instr(vpunpcklwd))]
9174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9175pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
9176    unsafe {
9177        let a = a.as_i16x32();
9178        let b = b.as_i16x32();
9179        #[rustfmt::skip]
9180        let r: i16x32 = simd_shuffle!(
9181            a,
9182            b,
9183            [
9184               0,  32+0,   1, 32+1,
9185               2,  32+2,   3, 32+3,
9186               8,  32+8,   9, 32+9,
9187               10, 32+10, 11, 32+11,
9188               16, 32+16, 17, 32+17,
9189               18, 32+18, 19, 32+19,
9190               24, 32+24, 25, 32+25,
9191               26, 32+26, 27, 32+27
9192            ],
9193        );
9194        transmute(r)
9195    }
9196}
9197
9198/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9199///
9200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
9201#[inline]
9202#[target_feature(enable = "avx512bw")]
9203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9204#[cfg_attr(test, assert_instr(vpunpcklwd))]
9205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9206pub const fn _mm512_mask_unpacklo_epi16(
9207    src: __m512i,
9208    k: __mmask32,
9209    a: __m512i,
9210    b: __m512i,
9211) -> __m512i {
9212    unsafe {
9213        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9214        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
9215    }
9216}
9217
9218/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9219///
9220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
9221#[inline]
9222#[target_feature(enable = "avx512bw")]
9223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9224#[cfg_attr(test, assert_instr(vpunpcklwd))]
9225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9226pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9227    unsafe {
9228        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9229        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
9230    }
9231}
9232
9233/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9234///
9235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
9236#[inline]
9237#[target_feature(enable = "avx512bw,avx512vl")]
9238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9239#[cfg_attr(test, assert_instr(vpunpcklwd))]
9240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9241pub const fn _mm256_mask_unpacklo_epi16(
9242    src: __m256i,
9243    k: __mmask16,
9244    a: __m256i,
9245    b: __m256i,
9246) -> __m256i {
9247    unsafe {
9248        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9249        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
9250    }
9251}
9252
9253/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9254///
9255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
9256#[inline]
9257#[target_feature(enable = "avx512bw,avx512vl")]
9258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9259#[cfg_attr(test, assert_instr(vpunpcklwd))]
9260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9261pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9262    unsafe {
9263        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9264        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
9265    }
9266}
9267
9268/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9269///
9270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
9271#[inline]
9272#[target_feature(enable = "avx512bw,avx512vl")]
9273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9274#[cfg_attr(test, assert_instr(vpunpcklwd))]
9275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9276pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9277    unsafe {
9278        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9279        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
9280    }
9281}
9282
9283/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9284///
9285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
9286#[inline]
9287#[target_feature(enable = "avx512bw,avx512vl")]
9288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9289#[cfg_attr(test, assert_instr(vpunpcklwd))]
9290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9291pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9292    unsafe {
9293        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9294        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
9295    }
9296}
9297
9298/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9299///
9300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
9301#[inline]
9302#[target_feature(enable = "avx512bw")]
9303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9304#[cfg_attr(test, assert_instr(vpunpcklbw))]
9305#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9306pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
9307    unsafe {
9308        let a = a.as_i8x64();
9309        let b = b.as_i8x64();
9310        #[rustfmt::skip]
9311        let r: i8x64 = simd_shuffle!(
9312            a,
9313            b,
9314            [
9315                0,  64+0,   1, 64+1,
9316                2,  64+2,   3, 64+3,
9317                4,  64+4,   5, 64+5,
9318                6,  64+6,   7, 64+7,
9319                16, 64+16, 17, 64+17,
9320                18, 64+18, 19, 64+19,
9321                20, 64+20, 21, 64+21,
9322                22, 64+22, 23, 64+23,
9323                32, 64+32, 33, 64+33,
9324                34, 64+34, 35, 64+35,
9325                36, 64+36, 37, 64+37,
9326                38, 64+38, 39, 64+39,
9327                48, 64+48, 49, 64+49,
9328                50, 64+50, 51, 64+51,
9329                52, 64+52, 53, 64+53,
9330                54, 64+54, 55, 64+55,
9331            ],
9332        );
9333        transmute(r)
9334    }
9335}
9336
9337/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9338///
9339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
9340#[inline]
9341#[target_feature(enable = "avx512bw")]
9342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9343#[cfg_attr(test, assert_instr(vpunpcklbw))]
9344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9345pub const fn _mm512_mask_unpacklo_epi8(
9346    src: __m512i,
9347    k: __mmask64,
9348    a: __m512i,
9349    b: __m512i,
9350) -> __m512i {
9351    unsafe {
9352        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9353        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
9354    }
9355}
9356
9357/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
9360#[inline]
9361#[target_feature(enable = "avx512bw")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vpunpcklbw))]
9364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9365pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9366    unsafe {
9367        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9368        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
9369    }
9370}
9371
9372/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9373///
9374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
9375#[inline]
9376#[target_feature(enable = "avx512bw,avx512vl")]
9377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9378#[cfg_attr(test, assert_instr(vpunpcklbw))]
9379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9380pub const fn _mm256_mask_unpacklo_epi8(
9381    src: __m256i,
9382    k: __mmask32,
9383    a: __m256i,
9384    b: __m256i,
9385) -> __m256i {
9386    unsafe {
9387        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9388        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
9389    }
9390}
9391
9392/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9393///
9394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
9395#[inline]
9396#[target_feature(enable = "avx512bw,avx512vl")]
9397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9398#[cfg_attr(test, assert_instr(vpunpcklbw))]
9399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9400pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9401    unsafe {
9402        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9403        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
9404    }
9405}
9406
9407/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9408///
9409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
9410#[inline]
9411#[target_feature(enable = "avx512bw,avx512vl")]
9412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9413#[cfg_attr(test, assert_instr(vpunpcklbw))]
9414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9415pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9416    unsafe {
9417        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9418        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
9419    }
9420}
9421
9422/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9423///
9424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
9425#[inline]
9426#[target_feature(enable = "avx512bw,avx512vl")]
9427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9428#[cfg_attr(test, assert_instr(vpunpcklbw))]
9429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9430pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9431    unsafe {
9432        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9433        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
9434    }
9435}
9436
9437/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
9440#[inline]
9441#[target_feature(enable = "avx512bw")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vmovdqu16))]
9444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9445pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9446    unsafe {
9447        let mov = a.as_i16x32();
9448        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
9449    }
9450}
9451
9452/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9453///
9454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
9455#[inline]
9456#[target_feature(enable = "avx512bw")]
9457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9458#[cfg_attr(test, assert_instr(vmovdqu16))]
9459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9460pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
9461    unsafe {
9462        let mov = a.as_i16x32();
9463        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
9464    }
9465}
9466
9467/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9468///
9469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
9470#[inline]
9471#[target_feature(enable = "avx512bw,avx512vl")]
9472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9473#[cfg_attr(test, assert_instr(vmovdqu16))]
9474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9475pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
9476    unsafe {
9477        let mov = a.as_i16x16();
9478        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
9479    }
9480}
9481
9482/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9483///
9484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
9485#[inline]
9486#[target_feature(enable = "avx512bw,avx512vl")]
9487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9488#[cfg_attr(test, assert_instr(vmovdqu16))]
9489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9490pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
9491    unsafe {
9492        let mov = a.as_i16x16();
9493        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
9494    }
9495}
9496
9497/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9498///
9499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
9500#[inline]
9501#[target_feature(enable = "avx512bw,avx512vl")]
9502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9503#[cfg_attr(test, assert_instr(vmovdqu16))]
9504#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9505pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9506    unsafe {
9507        let mov = a.as_i16x8();
9508        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
9509    }
9510}
9511
9512/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9513///
9514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
9515#[inline]
9516#[target_feature(enable = "avx512bw,avx512vl")]
9517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9518#[cfg_attr(test, assert_instr(vmovdqu16))]
9519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9520pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
9521    unsafe {
9522        let mov = a.as_i16x8();
9523        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
9524    }
9525}
9526
9527/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9528///
9529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
9530#[inline]
9531#[target_feature(enable = "avx512bw")]
9532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9533#[cfg_attr(test, assert_instr(vmovdqu8))]
9534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9535pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
9536    unsafe {
9537        let mov = a.as_i8x64();
9538        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
9539    }
9540}
9541
9542/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9543///
9544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
9545#[inline]
9546#[target_feature(enable = "avx512bw")]
9547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9548#[cfg_attr(test, assert_instr(vmovdqu8))]
9549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9550pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
9551    unsafe {
9552        let mov = a.as_i8x64();
9553        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
9554    }
9555}
9556
9557/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9558///
9559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
9560#[inline]
9561#[target_feature(enable = "avx512bw,avx512vl")]
9562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9563#[cfg_attr(test, assert_instr(vmovdqu8))]
9564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9565pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
9566    unsafe {
9567        let mov = a.as_i8x32();
9568        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
9569    }
9570}
9571
9572/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
9575#[inline]
9576#[target_feature(enable = "avx512bw,avx512vl")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vmovdqu8))]
9579#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9580pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
9581    unsafe {
9582        let mov = a.as_i8x32();
9583        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
9584    }
9585}
9586
9587/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9588///
9589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
9590#[inline]
9591#[target_feature(enable = "avx512bw,avx512vl")]
9592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9593#[cfg_attr(test, assert_instr(vmovdqu8))]
9594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9595pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9596    unsafe {
9597        let mov = a.as_i8x16();
9598        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
9599    }
9600}
9601
9602/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9603///
9604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
9605#[inline]
9606#[target_feature(enable = "avx512bw,avx512vl")]
9607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9608#[cfg_attr(test, assert_instr(vmovdqu8))]
9609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9610pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
9611    unsafe {
9612        let mov = a.as_i8x16();
9613        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
9614    }
9615}
9616
9617/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9618///
9619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
9620#[inline]
9621#[target_feature(enable = "avx512bw")]
9622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9623#[cfg_attr(test, assert_instr(vpbroadcastw))]
9624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9625pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
9626    unsafe {
9627        let r = _mm512_set1_epi16(a).as_i16x32();
9628        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
9629    }
9630}
9631
9632/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9633///
9634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
9635#[inline]
9636#[target_feature(enable = "avx512bw")]
9637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9638#[cfg_attr(test, assert_instr(vpbroadcastw))]
9639#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9640pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
9641    unsafe {
9642        let r = _mm512_set1_epi16(a).as_i16x32();
9643        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
9644    }
9645}
9646
9647/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9648///
9649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
9650#[inline]
9651#[target_feature(enable = "avx512bw,avx512vl")]
9652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9653#[cfg_attr(test, assert_instr(vpbroadcastw))]
9654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9655pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
9656    unsafe {
9657        let r = _mm256_set1_epi16(a).as_i16x16();
9658        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
9659    }
9660}
9661
9662/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9663///
9664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
9665#[inline]
9666#[target_feature(enable = "avx512bw,avx512vl")]
9667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9668#[cfg_attr(test, assert_instr(vpbroadcastw))]
9669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9670pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
9671    unsafe {
9672        let r = _mm256_set1_epi16(a).as_i16x16();
9673        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
9674    }
9675}
9676
9677/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9678///
9679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
9680#[inline]
9681#[target_feature(enable = "avx512bw,avx512vl")]
9682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9683#[cfg_attr(test, assert_instr(vpbroadcastw))]
9684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9685pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
9686    unsafe {
9687        let r = _mm_set1_epi16(a).as_i16x8();
9688        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
9689    }
9690}
9691
9692/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9693///
9694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
9695#[inline]
9696#[target_feature(enable = "avx512bw,avx512vl")]
9697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9698#[cfg_attr(test, assert_instr(vpbroadcastw))]
9699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9700pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
9701    unsafe {
9702        let r = _mm_set1_epi16(a).as_i16x8();
9703        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
9704    }
9705}
9706
9707/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9708///
9709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9710#[inline]
9711#[target_feature(enable = "avx512bw")]
9712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9713#[cfg_attr(test, assert_instr(vpbroadcast))]
9714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9715pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9716    unsafe {
9717        let r = _mm512_set1_epi8(a).as_i8x64();
9718        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9719    }
9720}
9721
9722/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9723///
9724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9725#[inline]
9726#[target_feature(enable = "avx512bw")]
9727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9728#[cfg_attr(test, assert_instr(vpbroadcast))]
9729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9730pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9731    unsafe {
9732        let r = _mm512_set1_epi8(a).as_i8x64();
9733        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9734    }
9735}
9736
9737/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9740#[inline]
9741#[target_feature(enable = "avx512bw,avx512vl")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vpbroadcast))]
9744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9745pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9746    unsafe {
9747        let r = _mm256_set1_epi8(a).as_i8x32();
9748        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9749    }
9750}
9751
9752/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9753///
9754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9755#[inline]
9756#[target_feature(enable = "avx512bw,avx512vl")]
9757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9758#[cfg_attr(test, assert_instr(vpbroadcast))]
9759#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9760pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9761    unsafe {
9762        let r = _mm256_set1_epi8(a).as_i8x32();
9763        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9764    }
9765}
9766
9767/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9768///
9769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9770#[inline]
9771#[target_feature(enable = "avx512bw,avx512vl")]
9772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9773#[cfg_attr(test, assert_instr(vpbroadcast))]
9774#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9775pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9776    unsafe {
9777        let r = _mm_set1_epi8(a).as_i8x16();
9778        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9779    }
9780}
9781
9782/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9783///
9784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9785#[inline]
9786#[target_feature(enable = "avx512bw,avx512vl")]
9787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9788#[cfg_attr(test, assert_instr(vpbroadcast))]
9789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9790pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9791    unsafe {
9792        let r = _mm_set1_epi8(a).as_i8x16();
9793        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9794    }
9795}
9796
9797/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9798///
9799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9800#[inline]
9801#[target_feature(enable = "avx512bw")]
9802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9803#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9804#[rustc_legacy_const_generics(1)]
9805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9806pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9807    unsafe {
9808        static_assert_uimm_bits!(IMM8, 8);
9809        let a = a.as_i16x32();
9810        let r: i16x32 = simd_shuffle!(
9811            a,
9812            a,
9813            [
9814                IMM8 as u32 & 0b11,
9815                (IMM8 as u32 >> 2) & 0b11,
9816                (IMM8 as u32 >> 4) & 0b11,
9817                (IMM8 as u32 >> 6) & 0b11,
9818                4,
9819                5,
9820                6,
9821                7,
9822                (IMM8 as u32 & 0b11) + 8,
9823                ((IMM8 as u32 >> 2) & 0b11) + 8,
9824                ((IMM8 as u32 >> 4) & 0b11) + 8,
9825                ((IMM8 as u32 >> 6) & 0b11) + 8,
9826                12,
9827                13,
9828                14,
9829                15,
9830                (IMM8 as u32 & 0b11) + 16,
9831                ((IMM8 as u32 >> 2) & 0b11) + 16,
9832                ((IMM8 as u32 >> 4) & 0b11) + 16,
9833                ((IMM8 as u32 >> 6) & 0b11) + 16,
9834                20,
9835                21,
9836                22,
9837                23,
9838                (IMM8 as u32 & 0b11) + 24,
9839                ((IMM8 as u32 >> 2) & 0b11) + 24,
9840                ((IMM8 as u32 >> 4) & 0b11) + 24,
9841                ((IMM8 as u32 >> 6) & 0b11) + 24,
9842                28,
9843                29,
9844                30,
9845                31,
9846            ],
9847        );
9848        transmute(r)
9849    }
9850}
9851
9852/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9853///
9854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9855#[inline]
9856#[target_feature(enable = "avx512bw")]
9857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9858#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9859#[rustc_legacy_const_generics(3)]
9860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9861pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9862    src: __m512i,
9863    k: __mmask32,
9864    a: __m512i,
9865) -> __m512i {
9866    unsafe {
9867        static_assert_uimm_bits!(IMM8, 8);
9868        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9869        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9870    }
9871}
9872
9873/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9874///
9875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9876#[inline]
9877#[target_feature(enable = "avx512bw")]
9878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9879#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9880#[rustc_legacy_const_generics(2)]
9881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9882pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9883    unsafe {
9884        static_assert_uimm_bits!(IMM8, 8);
9885        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9886        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9887    }
9888}
9889
9890/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9893#[inline]
9894#[target_feature(enable = "avx512bw,avx512vl")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9897#[rustc_legacy_const_generics(3)]
9898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9899pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9900    src: __m256i,
9901    k: __mmask16,
9902    a: __m256i,
9903) -> __m256i {
9904    unsafe {
9905        static_assert_uimm_bits!(IMM8, 8);
9906        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9907        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9908    }
9909}
9910
9911/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9912///
9913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9914#[inline]
9915#[target_feature(enable = "avx512bw,avx512vl")]
9916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9917#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9918#[rustc_legacy_const_generics(2)]
9919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9920pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9921    unsafe {
9922        static_assert_uimm_bits!(IMM8, 8);
9923        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9924        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9925    }
9926}
9927
9928/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9929///
9930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9931#[inline]
9932#[target_feature(enable = "avx512bw,avx512vl")]
9933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9934#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9935#[rustc_legacy_const_generics(3)]
9936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9937pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
9938    src: __m128i,
9939    k: __mmask8,
9940    a: __m128i,
9941) -> __m128i {
9942    unsafe {
9943        static_assert_uimm_bits!(IMM8, 8);
9944        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9945        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9946    }
9947}
9948
9949/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9950///
9951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9952#[inline]
9953#[target_feature(enable = "avx512bw,avx512vl")]
9954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9955#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9956#[rustc_legacy_const_generics(2)]
9957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9958pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9959    unsafe {
9960        static_assert_uimm_bits!(IMM8, 8);
9961        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9962        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9963    }
9964}
9965
9966/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9969#[inline]
9970#[target_feature(enable = "avx512bw")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9973#[rustc_legacy_const_generics(1)]
9974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9975pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9976    unsafe {
9977        static_assert_uimm_bits!(IMM8, 8);
9978        let a = a.as_i16x32();
9979        let r: i16x32 = simd_shuffle!(
9980            a,
9981            a,
9982            [
9983                0,
9984                1,
9985                2,
9986                3,
9987                (IMM8 as u32 & 0b11) + 4,
9988                ((IMM8 as u32 >> 2) & 0b11) + 4,
9989                ((IMM8 as u32 >> 4) & 0b11) + 4,
9990                ((IMM8 as u32 >> 6) & 0b11) + 4,
9991                8,
9992                9,
9993                10,
9994                11,
9995                (IMM8 as u32 & 0b11) + 12,
9996                ((IMM8 as u32 >> 2) & 0b11) + 12,
9997                ((IMM8 as u32 >> 4) & 0b11) + 12,
9998                ((IMM8 as u32 >> 6) & 0b11) + 12,
9999                16,
10000                17,
10001                18,
10002                19,
10003                (IMM8 as u32 & 0b11) + 20,
10004                ((IMM8 as u32 >> 2) & 0b11) + 20,
10005                ((IMM8 as u32 >> 4) & 0b11) + 20,
10006                ((IMM8 as u32 >> 6) & 0b11) + 20,
10007                24,
10008                25,
10009                26,
10010                27,
10011                (IMM8 as u32 & 0b11) + 28,
10012                ((IMM8 as u32 >> 2) & 0b11) + 28,
10013                ((IMM8 as u32 >> 4) & 0b11) + 28,
10014                ((IMM8 as u32 >> 6) & 0b11) + 28,
10015            ],
10016        );
10017        transmute(r)
10018    }
10019}
10020
10021/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10022///
10023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
10024#[inline]
10025#[target_feature(enable = "avx512bw")]
10026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10027#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10028#[rustc_legacy_const_generics(3)]
10029#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10030pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
10031    src: __m512i,
10032    k: __mmask32,
10033    a: __m512i,
10034) -> __m512i {
10035    unsafe {
10036        static_assert_uimm_bits!(IMM8, 8);
10037        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10038        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10039    }
10040}
10041
10042/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10043///
10044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
10045#[inline]
10046#[target_feature(enable = "avx512bw")]
10047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10048#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10049#[rustc_legacy_const_generics(2)]
10050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10051pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10052    unsafe {
10053        static_assert_uimm_bits!(IMM8, 8);
10054        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10055        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10056    }
10057}
10058
10059/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10060///
10061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
10062#[inline]
10063#[target_feature(enable = "avx512bw,avx512vl")]
10064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10065#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10066#[rustc_legacy_const_generics(3)]
10067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10068pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
10069    src: __m256i,
10070    k: __mmask16,
10071    a: __m256i,
10072) -> __m256i {
10073    unsafe {
10074        static_assert_uimm_bits!(IMM8, 8);
10075        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10076        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10077    }
10078}
10079
10080/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
10083#[inline]
10084#[target_feature(enable = "avx512bw,avx512vl")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10087#[rustc_legacy_const_generics(2)]
10088#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10089pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10090    unsafe {
10091        static_assert_uimm_bits!(IMM8, 8);
10092        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10093        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10094    }
10095}
10096
10097/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10098///
10099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
10100#[inline]
10101#[target_feature(enable = "avx512bw,avx512vl")]
10102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10103#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10104#[rustc_legacy_const_generics(3)]
10105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10106pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
10107    src: __m128i,
10108    k: __mmask8,
10109    a: __m128i,
10110) -> __m128i {
10111    unsafe {
10112        static_assert_uimm_bits!(IMM8, 8);
10113        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10114        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10115    }
10116}
10117
10118/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10119///
10120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
10121#[inline]
10122#[target_feature(enable = "avx512bw,avx512vl")]
10123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10124#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10125#[rustc_legacy_const_generics(2)]
10126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10127pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10128    unsafe {
10129        static_assert_uimm_bits!(IMM8, 8);
10130        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10131        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10132    }
10133}
10134
10135/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
10136///
10137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
10138#[inline]
10139#[target_feature(enable = "avx512bw")]
10140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10141#[cfg_attr(test, assert_instr(vpshufb))]
10142pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
10143    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
10144}
10145
10146/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10147///
10148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
10149#[inline]
10150#[target_feature(enable = "avx512bw")]
10151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10152#[cfg_attr(test, assert_instr(vpshufb))]
10153pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10154    unsafe {
10155        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10156        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
10157    }
10158}
10159
10160/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10161///
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
10163#[inline]
10164#[target_feature(enable = "avx512bw")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vpshufb))]
10167pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10168    unsafe {
10169        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10170        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
10171    }
10172}
10173
10174/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10175///
10176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
10177#[inline]
10178#[target_feature(enable = "avx512bw,avx512vl")]
10179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10180#[cfg_attr(test, assert_instr(vpshufb))]
10181pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10182    unsafe {
10183        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10184        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
10185    }
10186}
10187
10188/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10189///
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
10191#[inline]
10192#[target_feature(enable = "avx512bw,avx512vl")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vpshufb))]
10195pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10196    unsafe {
10197        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10198        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
10199    }
10200}
10201
10202/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10203///
10204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
10205#[inline]
10206#[target_feature(enable = "avx512bw,avx512vl")]
10207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10208#[cfg_attr(test, assert_instr(vpshufb))]
10209pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10210    unsafe {
10211        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10212        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
10213    }
10214}
10215
10216/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10217///
10218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
10219#[inline]
10220#[target_feature(enable = "avx512bw,avx512vl")]
10221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10222#[cfg_attr(test, assert_instr(vpshufb))]
10223pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10224    unsafe {
10225        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10226        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
10227    }
10228}
10229
10230/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10231///
10232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
10233#[inline]
10234#[target_feature(enable = "avx512bw")]
10235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10236#[cfg_attr(test, assert_instr(vptestmw))]
10237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10238pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10239    let and = _mm512_and_si512(a, b);
10240    let zero = _mm512_setzero_si512();
10241    _mm512_cmpneq_epi16_mask(and, zero)
10242}
10243
10244/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10245///
10246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
10247#[inline]
10248#[target_feature(enable = "avx512bw")]
10249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10250#[cfg_attr(test, assert_instr(vptestmw))]
10251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10252pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10253    let and = _mm512_and_si512(a, b);
10254    let zero = _mm512_setzero_si512();
10255    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
10256}
10257
10258/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10259///
10260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
10261#[inline]
10262#[target_feature(enable = "avx512bw,avx512vl")]
10263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10264#[cfg_attr(test, assert_instr(vptestmw))]
10265#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10266pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10267    let and = _mm256_and_si256(a, b);
10268    let zero = _mm256_setzero_si256();
10269    _mm256_cmpneq_epi16_mask(and, zero)
10270}
10271
10272/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10273///
10274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
10275#[inline]
10276#[target_feature(enable = "avx512bw,avx512vl")]
10277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10278#[cfg_attr(test, assert_instr(vptestmw))]
10279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10280pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10281    let and = _mm256_and_si256(a, b);
10282    let zero = _mm256_setzero_si256();
10283    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
10284}
10285
10286/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10287///
10288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
10289#[inline]
10290#[target_feature(enable = "avx512bw,avx512vl")]
10291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10292#[cfg_attr(test, assert_instr(vptestmw))]
10293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10294pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10295    let and = _mm_and_si128(a, b);
10296    let zero = _mm_setzero_si128();
10297    _mm_cmpneq_epi16_mask(and, zero)
10298}
10299
10300/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10301///
10302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
10303#[inline]
10304#[target_feature(enable = "avx512bw,avx512vl")]
10305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10306#[cfg_attr(test, assert_instr(vptestmw))]
10307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10308pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10309    let and = _mm_and_si128(a, b);
10310    let zero = _mm_setzero_si128();
10311    _mm_mask_cmpneq_epi16_mask(k, and, zero)
10312}
10313
10314/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10315///
10316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
10317#[inline]
10318#[target_feature(enable = "avx512bw")]
10319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10320#[cfg_attr(test, assert_instr(vptestmb))]
10321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10322pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10323    let and = _mm512_and_si512(a, b);
10324    let zero = _mm512_setzero_si512();
10325    _mm512_cmpneq_epi8_mask(and, zero)
10326}
10327
10328/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10329///
10330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
10331#[inline]
10332#[target_feature(enable = "avx512bw")]
10333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10334#[cfg_attr(test, assert_instr(vptestmb))]
10335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10336pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10337    let and = _mm512_and_si512(a, b);
10338    let zero = _mm512_setzero_si512();
10339    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
10340}
10341
10342/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10343///
10344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
10345#[inline]
10346#[target_feature(enable = "avx512bw,avx512vl")]
10347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10348#[cfg_attr(test, assert_instr(vptestmb))]
10349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10350pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10351    let and = _mm256_and_si256(a, b);
10352    let zero = _mm256_setzero_si256();
10353    _mm256_cmpneq_epi8_mask(and, zero)
10354}
10355
10356/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
10359#[inline]
10360#[target_feature(enable = "avx512bw,avx512vl")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vptestmb))]
10363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10364pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10365    let and = _mm256_and_si256(a, b);
10366    let zero = _mm256_setzero_si256();
10367    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
10368}
10369
10370/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10371///
10372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
10373#[inline]
10374#[target_feature(enable = "avx512bw,avx512vl")]
10375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10376#[cfg_attr(test, assert_instr(vptestmb))]
10377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10378pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10379    let and = _mm_and_si128(a, b);
10380    let zero = _mm_setzero_si128();
10381    _mm_cmpneq_epi8_mask(and, zero)
10382}
10383
10384/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10385///
10386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
10387#[inline]
10388#[target_feature(enable = "avx512bw,avx512vl")]
10389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10390#[cfg_attr(test, assert_instr(vptestmb))]
10391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10392pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10393    let and = _mm_and_si128(a, b);
10394    let zero = _mm_setzero_si128();
10395    _mm_mask_cmpneq_epi8_mask(k, and, zero)
10396}
10397
10398/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10399///
10400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
10401#[inline]
10402#[target_feature(enable = "avx512bw")]
10403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10404#[cfg_attr(test, assert_instr(vptestnmw))]
10405#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10406pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10407    let and = _mm512_and_si512(a, b);
10408    let zero = _mm512_setzero_si512();
10409    _mm512_cmpeq_epi16_mask(and, zero)
10410}
10411
10412/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
10415#[inline]
10416#[target_feature(enable = "avx512bw")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vptestnmw))]
10419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10420pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10421    let and = _mm512_and_si512(a, b);
10422    let zero = _mm512_setzero_si512();
10423    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
10424}
10425
10426/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10427///
10428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
10429#[inline]
10430#[target_feature(enable = "avx512bw,avx512vl")]
10431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10432#[cfg_attr(test, assert_instr(vptestnmw))]
10433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10434pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10435    let and = _mm256_and_si256(a, b);
10436    let zero = _mm256_setzero_si256();
10437    _mm256_cmpeq_epi16_mask(and, zero)
10438}
10439
10440/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10441///
10442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
10443#[inline]
10444#[target_feature(enable = "avx512bw,avx512vl")]
10445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10446#[cfg_attr(test, assert_instr(vptestnmw))]
10447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10448pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10449    let and = _mm256_and_si256(a, b);
10450    let zero = _mm256_setzero_si256();
10451    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
10452}
10453
10454/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10455///
10456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
10457#[inline]
10458#[target_feature(enable = "avx512bw,avx512vl")]
10459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10460#[cfg_attr(test, assert_instr(vptestnmw))]
10461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10462pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10463    let and = _mm_and_si128(a, b);
10464    let zero = _mm_setzero_si128();
10465    _mm_cmpeq_epi16_mask(and, zero)
10466}
10467
10468/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10469///
10470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
10471#[inline]
10472#[target_feature(enable = "avx512bw,avx512vl")]
10473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10474#[cfg_attr(test, assert_instr(vptestnmw))]
10475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10476pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10477    let and = _mm_and_si128(a, b);
10478    let zero = _mm_setzero_si128();
10479    _mm_mask_cmpeq_epi16_mask(k, and, zero)
10480}
10481
10482/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10483///
10484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
10485#[inline]
10486#[target_feature(enable = "avx512bw")]
10487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10488#[cfg_attr(test, assert_instr(vptestnmb))]
10489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10490pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10491    let and = _mm512_and_si512(a, b);
10492    let zero = _mm512_setzero_si512();
10493    _mm512_cmpeq_epi8_mask(and, zero)
10494}
10495
10496/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10497///
10498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
10499#[inline]
10500#[target_feature(enable = "avx512bw")]
10501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10502#[cfg_attr(test, assert_instr(vptestnmb))]
10503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10504pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10505    let and = _mm512_and_si512(a, b);
10506    let zero = _mm512_setzero_si512();
10507    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
10508}
10509
10510/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10511///
10512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
10513#[inline]
10514#[target_feature(enable = "avx512bw,avx512vl")]
10515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10516#[cfg_attr(test, assert_instr(vptestnmb))]
10517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10518pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10519    let and = _mm256_and_si256(a, b);
10520    let zero = _mm256_setzero_si256();
10521    _mm256_cmpeq_epi8_mask(and, zero)
10522}
10523
10524/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10525///
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
10527#[inline]
10528#[target_feature(enable = "avx512bw,avx512vl")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vptestnmb))]
10531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10532pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10533    let and = _mm256_and_si256(a, b);
10534    let zero = _mm256_setzero_si256();
10535    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
10536}
10537
10538/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10539///
10540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
10541#[inline]
10542#[target_feature(enable = "avx512bw,avx512vl")]
10543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10544#[cfg_attr(test, assert_instr(vptestnmb))]
10545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10546pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10547    let and = _mm_and_si128(a, b);
10548    let zero = _mm_setzero_si128();
10549    _mm_cmpeq_epi8_mask(and, zero)
10550}
10551
10552/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
10555#[inline]
10556#[target_feature(enable = "avx512bw,avx512vl")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vptestnmb))]
10559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10560pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10561    let and = _mm_and_si128(a, b);
10562    let zero = _mm_setzero_si128();
10563    _mm_mask_cmpeq_epi8_mask(k, and, zero)
10564}
10565
10566/// Store 64-bit mask from a into memory.
10567///
10568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
10569#[inline]
10570#[target_feature(enable = "avx512bw")]
10571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10572#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10574pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
10575    ptr::write(mem_addr as *mut __mmask64, a);
10576}
10577
10578/// Store 32-bit mask from a into memory.
10579///
10580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
10581#[inline]
10582#[target_feature(enable = "avx512bw")]
10583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10584#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10586pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
10587    ptr::write(mem_addr as *mut __mmask32, a);
10588}
10589
10590/// Load 64-bit mask from memory into k.
10591///
10592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
10593#[inline]
10594#[target_feature(enable = "avx512bw")]
10595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10596#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10598pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
10599    ptr::read(mem_addr as *const __mmask64)
10600}
10601
10602/// Load 32-bit mask from memory into k.
10603///
10604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
10605#[inline]
10606#[target_feature(enable = "avx512bw")]
10607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10608#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10610pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
10611    ptr::read(mem_addr as *const __mmask32)
10612}
10613
10614/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
10615///
10616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
10617#[inline]
10618#[target_feature(enable = "avx512bw")]
10619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10620#[cfg_attr(test, assert_instr(vpsadbw))]
10621pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
10622    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
10623}
10624
10625/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10626///
10627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
10628#[inline]
10629#[target_feature(enable = "avx512bw")]
10630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10631#[rustc_legacy_const_generics(2)]
10632#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10633pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10634    unsafe {
10635        static_assert_uimm_bits!(IMM8, 8);
10636        let a = a.as_u8x64();
10637        let b = b.as_u8x64();
10638        let r = vdbpsadbw(a, b, IMM8);
10639        transmute(r)
10640    }
10641}
10642
10643/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10644///
10645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
10646#[inline]
10647#[target_feature(enable = "avx512bw")]
10648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10649#[rustc_legacy_const_generics(4)]
10650#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10651pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
10652    src: __m512i,
10653    k: __mmask32,
10654    a: __m512i,
10655    b: __m512i,
10656) -> __m512i {
10657    unsafe {
10658        static_assert_uimm_bits!(IMM8, 8);
10659        let a = a.as_u8x64();
10660        let b = b.as_u8x64();
10661        let r = vdbpsadbw(a, b, IMM8);
10662        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
10663    }
10664}
10665
10666/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10667///
10668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
10669#[inline]
10670#[target_feature(enable = "avx512bw")]
10671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10672#[rustc_legacy_const_generics(3)]
10673#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10674pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
10675    unsafe {
10676        static_assert_uimm_bits!(IMM8, 8);
10677        let a = a.as_u8x64();
10678        let b = b.as_u8x64();
10679        let r = vdbpsadbw(a, b, IMM8);
10680        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
10681    }
10682}
10683
10684/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10685///
10686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
10687#[inline]
10688#[target_feature(enable = "avx512bw,avx512vl")]
10689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10690#[rustc_legacy_const_generics(2)]
10691#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10692pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
10693    unsafe {
10694        static_assert_uimm_bits!(IMM8, 8);
10695        let a = a.as_u8x32();
10696        let b = b.as_u8x32();
10697        let r = vdbpsadbw256(a, b, IMM8);
10698        transmute(r)
10699    }
10700}
10701
10702/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10703///
10704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
10705#[inline]
10706#[target_feature(enable = "avx512bw,avx512vl")]
10707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10708#[rustc_legacy_const_generics(4)]
10709#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10710pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
10711    src: __m256i,
10712    k: __mmask16,
10713    a: __m256i,
10714    b: __m256i,
10715) -> __m256i {
10716    unsafe {
10717        static_assert_uimm_bits!(IMM8, 8);
10718        let a = a.as_u8x32();
10719        let b = b.as_u8x32();
10720        let r = vdbpsadbw256(a, b, IMM8);
10721        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
10722    }
10723}
10724
10725/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10726///
10727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
10728#[inline]
10729#[target_feature(enable = "avx512bw,avx512vl")]
10730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10731#[rustc_legacy_const_generics(3)]
10732#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10733pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
10734    unsafe {
10735        static_assert_uimm_bits!(IMM8, 8);
10736        let a = a.as_u8x32();
10737        let b = b.as_u8x32();
10738        let r = vdbpsadbw256(a, b, IMM8);
10739        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
10740    }
10741}
10742
10743/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
10746#[inline]
10747#[target_feature(enable = "avx512bw,avx512vl")]
10748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10749#[rustc_legacy_const_generics(2)]
10750#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10751pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10752    unsafe {
10753        static_assert_uimm_bits!(IMM8, 8);
10754        let a = a.as_u8x16();
10755        let b = b.as_u8x16();
10756        let r = vdbpsadbw128(a, b, IMM8);
10757        transmute(r)
10758    }
10759}
10760
10761/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10762///
10763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10764#[inline]
10765#[target_feature(enable = "avx512bw,avx512vl")]
10766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10767#[rustc_legacy_const_generics(4)]
10768#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10769pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10770    src: __m128i,
10771    k: __mmask8,
10772    a: __m128i,
10773    b: __m128i,
10774) -> __m128i {
10775    unsafe {
10776        static_assert_uimm_bits!(IMM8, 8);
10777        let a = a.as_u8x16();
10778        let b = b.as_u8x16();
10779        let r = vdbpsadbw128(a, b, IMM8);
10780        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10781    }
10782}
10783
10784/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10785///
10786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10787#[inline]
10788#[target_feature(enable = "avx512bw,avx512vl")]
10789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10790#[rustc_legacy_const_generics(3)]
10791#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10792pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10793    unsafe {
10794        static_assert_uimm_bits!(IMM8, 8);
10795        let a = a.as_u8x16();
10796        let b = b.as_u8x16();
10797        let r = vdbpsadbw128(a, b, IMM8);
10798        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10799    }
10800}
10801
10802/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10803///
10804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10805#[inline]
10806#[target_feature(enable = "avx512bw")]
10807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10808#[cfg_attr(test, assert_instr(vpmovw2m))]
10809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10810pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10811    let filter = _mm512_set1_epi16(1 << 15);
10812    let a = _mm512_and_si512(a, filter);
10813    _mm512_cmpeq_epi16_mask(a, filter)
10814}
10815
10816/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10817///
10818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10819#[inline]
10820#[target_feature(enable = "avx512bw,avx512vl")]
10821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10822#[cfg_attr(test, assert_instr(vpmovw2m))]
10823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10824pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10825    let filter = _mm256_set1_epi16(1 << 15);
10826    let a = _mm256_and_si256(a, filter);
10827    _mm256_cmpeq_epi16_mask(a, filter)
10828}
10829
10830/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10831///
10832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10833#[inline]
10834#[target_feature(enable = "avx512bw,avx512vl")]
10835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10836#[cfg_attr(test, assert_instr(vpmovw2m))]
10837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10838pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10839    let filter = _mm_set1_epi16(1 << 15);
10840    let a = _mm_and_si128(a, filter);
10841    _mm_cmpeq_epi16_mask(a, filter)
10842}
10843
10844/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10845///
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10847#[inline]
10848#[target_feature(enable = "avx512bw")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vpmovb2m))]
10851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10852pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10853    let filter = _mm512_set1_epi8(1 << 7);
10854    let a = _mm512_and_si512(a, filter);
10855    _mm512_cmpeq_epi8_mask(a, filter)
10856}
10857
10858/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10859///
10860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10861#[inline]
10862#[target_feature(enable = "avx512bw,avx512vl")]
10863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10864#[cfg_attr(test, assert_instr(vpmovmskb))]
10865// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10866// using vpmovb2m plus converting the mask register to a standard register.
10867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10868pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10869    let filter = _mm256_set1_epi8(1 << 7);
10870    let a = _mm256_and_si256(a, filter);
10871    _mm256_cmpeq_epi8_mask(a, filter)
10872}
10873
10874/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10877#[inline]
10878#[target_feature(enable = "avx512bw,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vpmovmskb))]
10881// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10882// using vpmovb2m plus converting the mask register to a standard register.
10883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10884pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10885    let filter = _mm_set1_epi8(1 << 7);
10886    let a = _mm_and_si128(a, filter);
10887    _mm_cmpeq_epi8_mask(a, filter)
10888}
10889
10890/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10891///
10892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10893#[inline]
10894#[target_feature(enable = "avx512bw")]
10895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10896#[cfg_attr(test, assert_instr(vpmovm2w))]
10897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10898pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10899    unsafe {
10900        let one = _mm512_set1_epi16(
10901            1 << 15
10902                | 1 << 14
10903                | 1 << 13
10904                | 1 << 12
10905                | 1 << 11
10906                | 1 << 10
10907                | 1 << 9
10908                | 1 << 8
10909                | 1 << 7
10910                | 1 << 6
10911                | 1 << 5
10912                | 1 << 4
10913                | 1 << 3
10914                | 1 << 2
10915                | 1 << 1
10916                | 1 << 0,
10917        )
10918        .as_i16x32();
10919        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10920    }
10921}
10922
10923/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10924///
10925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10926#[inline]
10927#[target_feature(enable = "avx512bw,avx512vl")]
10928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10929#[cfg_attr(test, assert_instr(vpmovm2w))]
10930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10931pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10932    unsafe {
10933        let one = _mm256_set1_epi16(
10934            1 << 15
10935                | 1 << 14
10936                | 1 << 13
10937                | 1 << 12
10938                | 1 << 11
10939                | 1 << 10
10940                | 1 << 9
10941                | 1 << 8
10942                | 1 << 7
10943                | 1 << 6
10944                | 1 << 5
10945                | 1 << 4
10946                | 1 << 3
10947                | 1 << 2
10948                | 1 << 1
10949                | 1 << 0,
10950        )
10951        .as_i16x16();
10952        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10953    }
10954}
10955
10956/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10957///
10958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10959#[inline]
10960#[target_feature(enable = "avx512bw,avx512vl")]
10961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10962#[cfg_attr(test, assert_instr(vpmovm2w))]
10963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10964pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10965    unsafe {
10966        let one = _mm_set1_epi16(
10967            1 << 15
10968                | 1 << 14
10969                | 1 << 13
10970                | 1 << 12
10971                | 1 << 11
10972                | 1 << 10
10973                | 1 << 9
10974                | 1 << 8
10975                | 1 << 7
10976                | 1 << 6
10977                | 1 << 5
10978                | 1 << 4
10979                | 1 << 3
10980                | 1 << 2
10981                | 1 << 1
10982                | 1 << 0,
10983        )
10984        .as_i16x8();
10985        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10986    }
10987}
10988
10989/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10990///
10991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10992#[inline]
10993#[target_feature(enable = "avx512bw")]
10994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10995#[cfg_attr(test, assert_instr(vpmovm2b))]
10996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10997pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10998    unsafe {
10999        let one =
11000            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11001                .as_i8x64();
11002        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
11003    }
11004}
11005
11006/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11007///
11008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
11009#[inline]
11010#[target_feature(enable = "avx512bw,avx512vl")]
11011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11012#[cfg_attr(test, assert_instr(vpmovm2b))]
11013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11014pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
11015    unsafe {
11016        let one =
11017            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11018                .as_i8x32();
11019        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
11020    }
11021}
11022
11023/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
11026#[inline]
11027#[target_feature(enable = "avx512bw,avx512vl")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vpmovm2b))]
11030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11031pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
11032    unsafe {
11033        let one =
11034            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11035                .as_i8x16();
11036        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
11037    }
11038}
11039
11040/// Convert 32-bit mask a into an integer value, and store the result in dst.
11041///
11042/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
11043#[inline]
11044#[target_feature(enable = "avx512bw")]
11045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11047pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
11048    a
11049}
11050
11051/// Convert integer value a into an 32-bit mask, and store the result in k.
11052///
11053/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
11054#[inline]
11055#[target_feature(enable = "avx512bw")]
11056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11058pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
11059    a
11060}
11061
11062/// Add 32-bit masks in a and b, and store the result in k.
11063///
11064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
11065#[inline]
11066#[target_feature(enable = "avx512bw")]
11067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11069pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11070    a.wrapping_add(b)
11071}
11072
11073/// Add 64-bit masks in a and b, and store the result in k.
11074///
11075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
11076#[inline]
11077#[target_feature(enable = "avx512bw")]
11078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11080pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11081    a.wrapping_add(b)
11082}
11083
11084/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
11085///
11086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
11087#[inline]
11088#[target_feature(enable = "avx512bw")]
11089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11091pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11092    a & b
11093}
11094
11095/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
11096///
11097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
11098#[inline]
11099#[target_feature(enable = "avx512bw")]
11100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11102pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11103    a & b
11104}
11105
11106/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
11107///
11108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
11109#[inline]
11110#[target_feature(enable = "avx512bw")]
11111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11113pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
11114    !a
11115}
11116
11117/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
11118///
11119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
11120#[inline]
11121#[target_feature(enable = "avx512bw")]
11122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11124pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
11125    !a
11126}
11127
11128/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
11129///
11130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
11131#[inline]
11132#[target_feature(enable = "avx512bw")]
11133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11135pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11136    _knot_mask32(a) & b
11137}
11138
11139/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
11140///
11141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
11142#[inline]
11143#[target_feature(enable = "avx512bw")]
11144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11146pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11147    _knot_mask64(a) & b
11148}
11149
11150/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
11151///
11152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
11153#[inline]
11154#[target_feature(enable = "avx512bw")]
11155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11157pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11158    a | b
11159}
11160
11161/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
11162///
11163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
11164#[inline]
11165#[target_feature(enable = "avx512bw")]
11166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11168pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11169    a | b
11170}
11171
11172/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
11173///
11174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
11175#[inline]
11176#[target_feature(enable = "avx512bw")]
11177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11179pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11180    a ^ b
11181}
11182
11183/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
11186#[inline]
11187#[target_feature(enable = "avx512bw")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11190pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11191    a ^ b
11192}
11193
11194/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
11197#[inline]
11198#[target_feature(enable = "avx512bw")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11201pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11202    _knot_mask32(a ^ b)
11203}
11204
11205/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
11206///
11207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
11208#[inline]
11209#[target_feature(enable = "avx512bw")]
11210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11211#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11212pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11213    _knot_mask64(a ^ b)
11214}
11215
11216/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11217/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11218///
11219/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
11220#[inline]
11221#[target_feature(enable = "avx512bw")]
11222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11224pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
11225    let tmp = _kor_mask32(a, b);
11226    *all_ones = (tmp == 0xffffffff) as u8;
11227    (tmp == 0) as u8
11228}
11229
11230/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11231/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11232///
11233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
11234#[inline]
11235#[target_feature(enable = "avx512bw")]
11236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11238pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
11239    let tmp = _kor_mask64(a, b);
11240    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
11241    (tmp == 0) as u8
11242}
11243
11244/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11245/// store 0 in dst.
11246///
11247/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
11248#[inline]
11249#[target_feature(enable = "avx512bw")]
11250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11252pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11253    (_kor_mask32(a, b) == 0xffffffff) as u8
11254}
11255
11256/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11257/// store 0 in dst.
11258///
11259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
11260#[inline]
11261#[target_feature(enable = "avx512bw")]
11262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11264pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11265    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
11266}
11267
11268/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11269/// store 0 in dst.
11270///
11271/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
11272#[inline]
11273#[target_feature(enable = "avx512bw")]
11274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11276pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11277    (_kor_mask32(a, b) == 0) as u8
11278}
11279
11280/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11281/// store 0 in dst.
11282///
11283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
11284#[inline]
11285#[target_feature(enable = "avx512bw")]
11286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11288pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11289    (_kor_mask64(a, b) == 0) as u8
11290}
11291
11292/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11293///
11294/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
11295#[inline]
11296#[target_feature(enable = "avx512bw")]
11297#[rustc_legacy_const_generics(1)]
11298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11300pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11301    a.unbounded_shl(COUNT)
11302}
11303
11304/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11305///
11306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
11307#[inline]
11308#[target_feature(enable = "avx512bw")]
11309#[rustc_legacy_const_generics(1)]
11310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11312pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11313    a.unbounded_shl(COUNT)
11314}
11315
11316/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11317///
11318/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
11319#[inline]
11320#[target_feature(enable = "avx512bw")]
11321#[rustc_legacy_const_generics(1)]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11324pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11325    a.unbounded_shr(COUNT)
11326}
11327
11328/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11329///
11330/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
11331#[inline]
11332#[target_feature(enable = "avx512bw")]
11333#[rustc_legacy_const_generics(1)]
11334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11336pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11337    a.unbounded_shr(COUNT)
11338}
11339
11340/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
11341/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11342/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11343///
11344/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
11345#[inline]
11346#[target_feature(enable = "avx512bw")]
11347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11349pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
11350    *and_not = (_kandn_mask32(a, b) == 0) as u8;
11351    (_kand_mask32(a, b) == 0) as u8
11352}
11353
11354/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
11355/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11356/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11357///
11358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
11359#[inline]
11360#[target_feature(enable = "avx512bw")]
11361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11363pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
11364    *and_not = (_kandn_mask64(a, b) == 0) as u8;
11365    (_kand_mask64(a, b) == 0) as u8
11366}
11367
11368/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
11369/// zeros, store 1 in dst, otherwise store 0 in dst.
11370///
11371/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
11372#[inline]
11373#[target_feature(enable = "avx512bw")]
11374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11376pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11377    (_kandn_mask32(a, b) == 0) as u8
11378}
11379
11380/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
11381/// zeros, store 1 in dst, otherwise store 0 in dst.
11382///
11383/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
11384#[inline]
11385#[target_feature(enable = "avx512bw")]
11386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11388pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11389    (_kandn_mask64(a, b) == 0) as u8
11390}
11391
11392/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11393/// store 0 in dst.
11394///
11395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
11396#[inline]
11397#[target_feature(enable = "avx512bw")]
11398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11400pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11401    (_kand_mask32(a, b) == 0) as u8
11402}
11403
11404/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11405/// store 0 in dst.
11406///
11407/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
11408#[inline]
11409#[target_feature(enable = "avx512bw")]
11410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11412pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11413    (_kand_mask64(a, b) == 0) as u8
11414}
11415
11416/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
11417///
11418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
11419#[inline]
11420#[target_feature(enable = "avx512bw")]
11421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11422#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
11423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11424pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
11425    ((a & 0xffff) << 16) | (b & 0xffff)
11426}
11427
11428/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
11429///
11430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
11431#[inline]
11432#[target_feature(enable = "avx512bw")]
11433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11434#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
11435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11436pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
11437    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
11438}
11439
11440/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11441///
11442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
11443#[inline]
11444#[target_feature(enable = "avx512bw")]
11445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11446#[cfg_attr(test, assert_instr(vpmovwb))]
11447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11448pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
11449    unsafe {
11450        let a = a.as_i16x32();
11451        transmute::<i8x32, _>(simd_cast(a))
11452    }
11453}
11454
11455/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
11458#[inline]
11459#[target_feature(enable = "avx512bw")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vpmovwb))]
11462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11463pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11464    unsafe {
11465        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11466        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
11467    }
11468}
11469
11470/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11471///
11472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
11473#[inline]
11474#[target_feature(enable = "avx512bw")]
11475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11476#[cfg_attr(test, assert_instr(vpmovwb))]
11477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11478pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11479    unsafe {
11480        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11481        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
11482    }
11483}
11484
11485/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11486///
11487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
11488#[inline]
11489#[target_feature(enable = "avx512bw,avx512vl")]
11490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11491#[cfg_attr(test, assert_instr(vpmovwb))]
11492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11493pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
11494    unsafe {
11495        let a = a.as_i16x16();
11496        transmute::<i8x16, _>(simd_cast(a))
11497    }
11498}
11499
11500/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
11503#[inline]
11504#[target_feature(enable = "avx512bw,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vpmovwb))]
11507#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11508pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11509    unsafe {
11510        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11511        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
11512    }
11513}
11514
11515/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11516///
11517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
11518#[inline]
11519#[target_feature(enable = "avx512bw,avx512vl")]
11520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11521#[cfg_attr(test, assert_instr(vpmovwb))]
11522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11523pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11524    unsafe {
11525        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11526        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
11527    }
11528}
11529
11530/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11531///
11532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
11533#[inline]
11534#[target_feature(enable = "avx512bw,avx512vl")]
11535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11536#[cfg_attr(test, assert_instr(vpmovwb))]
11537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11538pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
11539    unsafe {
11540        let a = a.as_i16x8();
11541        let v256: i16x16 = simd_shuffle!(
11542            a,
11543            i16x8::ZERO,
11544            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
11545        );
11546        transmute::<i8x16, _>(simd_cast(v256))
11547    }
11548}
11549
11550/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
11553#[inline]
11554#[target_feature(enable = "avx512bw,avx512vl")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vpmovwb))]
11557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11558pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11559    unsafe {
11560        let a = _mm_cvtepi16_epi8(a).as_i8x16();
11561        let src = simd_shuffle!(
11562            src.as_i8x16(),
11563            i8x16::ZERO,
11564            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
11565        );
11566        simd_select_bitmask(k as u16, a, src).as_m128i()
11567    }
11568}
11569
11570/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11571///
11572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
11573#[inline]
11574#[target_feature(enable = "avx512bw,avx512vl")]
11575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11576#[cfg_attr(test, assert_instr(vpmovwb))]
11577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11578pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11579    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
11580}
11581
11582/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
11585#[inline]
11586#[target_feature(enable = "avx512bw")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovswb))]
11589#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11590pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
11591    unsafe {
11592        simd_cast::<_, i8x32>(simd_imax(
11593            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
11594            i16x32::splat(i8::MIN as _),
11595        ))
11596        .as_m256i()
11597    }
11598}
11599
11600/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11601///
11602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
11603#[inline]
11604#[target_feature(enable = "avx512bw")]
11605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11606#[cfg_attr(test, assert_instr(vpmovswb))]
11607#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11608pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11609    unsafe {
11610        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
11611    }
11612}
11613
11614/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11615///
11616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
11617#[inline]
11618#[target_feature(enable = "avx512bw")]
11619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11620#[cfg_attr(test, assert_instr(vpmovswb))]
11621#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11622pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11623    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
11624}
11625
11626/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11627///
11628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
11629#[inline]
11630#[target_feature(enable = "avx512bw,avx512vl")]
11631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11632#[cfg_attr(test, assert_instr(vpmovswb))]
11633#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11634pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
11635    unsafe {
11636        simd_cast::<_, i8x16>(simd_imax(
11637            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
11638            i16x16::splat(i8::MIN as _),
11639        ))
11640        .as_m128i()
11641    }
11642}
11643
11644/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11645///
11646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
11647#[inline]
11648#[target_feature(enable = "avx512bw,avx512vl")]
11649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11650#[cfg_attr(test, assert_instr(vpmovswb))]
11651#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11652pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11653    unsafe {
11654        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
11655    }
11656}
11657
11658/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11659///
11660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
11661#[inline]
11662#[target_feature(enable = "avx512bw,avx512vl")]
11663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11664#[cfg_attr(test, assert_instr(vpmovswb))]
11665#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11666pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11667    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
11668}
11669
11670/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11671///
11672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
11673#[inline]
11674#[target_feature(enable = "avx512bw,avx512vl")]
11675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11676#[cfg_attr(test, assert_instr(vpmovswb))]
11677pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
11678    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
11679}
11680
11681/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
11684#[inline]
11685#[target_feature(enable = "avx512bw,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovswb))]
11688pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
11690}
11691
11692/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11693///
11694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
11695#[inline]
11696#[target_feature(enable = "avx512bw,avx512vl")]
11697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11698#[cfg_attr(test, assert_instr(vpmovswb))]
11699pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11700    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
11701}
11702
11703/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11704///
11705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
11706#[inline]
11707#[target_feature(enable = "avx512bw")]
11708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11709#[cfg_attr(test, assert_instr(vpmovuswb))]
11710#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11711pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
11712    unsafe {
11713        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
11714    }
11715}
11716
11717/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
11720#[inline]
11721#[target_feature(enable = "avx512bw")]
11722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11723#[cfg_attr(test, assert_instr(vpmovuswb))]
11724#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11725pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11726    unsafe {
11727        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
11728    }
11729}
11730
11731/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11732///
11733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
11734#[inline]
11735#[target_feature(enable = "avx512bw")]
11736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11737#[cfg_attr(test, assert_instr(vpmovuswb))]
11738#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11739pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11740    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
11741}
11742
11743/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11744///
11745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
11746#[inline]
11747#[target_feature(enable = "avx512bw,avx512vl")]
11748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11749#[cfg_attr(test, assert_instr(vpmovuswb))]
11750#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11751pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
11752    unsafe {
11753        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
11754    }
11755}
11756
11757/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11758///
11759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
11760#[inline]
11761#[target_feature(enable = "avx512bw,avx512vl")]
11762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11763#[cfg_attr(test, assert_instr(vpmovuswb))]
11764#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11765pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11766    unsafe {
11767        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
11768    }
11769}
11770
11771/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11772///
11773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
11774#[inline]
11775#[target_feature(enable = "avx512bw,avx512vl")]
11776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11777#[cfg_attr(test, assert_instr(vpmovuswb))]
11778#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11779pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11780    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
11781}
11782
11783/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11784///
11785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
11786#[inline]
11787#[target_feature(enable = "avx512bw,avx512vl")]
11788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11789#[cfg_attr(test, assert_instr(vpmovuswb))]
11790pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
11791    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
11792}
11793
11794/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11795///
11796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
11797#[inline]
11798#[target_feature(enable = "avx512bw,avx512vl")]
11799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11800#[cfg_attr(test, assert_instr(vpmovuswb))]
11801pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11802    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
11803}
11804
11805/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11806///
11807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
11808#[inline]
11809#[target_feature(enable = "avx512bw,avx512vl")]
11810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11811#[cfg_attr(test, assert_instr(vpmovuswb))]
11812pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11813    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
11814}
11815
11816/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11817///
11818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
11819#[inline]
11820#[target_feature(enable = "avx512bw")]
11821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11822#[cfg_attr(test, assert_instr(vpmovsxbw))]
11823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11824pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
11825    unsafe {
11826        let a = a.as_i8x32();
11827        transmute::<i16x32, _>(simd_cast(a))
11828    }
11829}
11830
11831/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11832///
11833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
11834#[inline]
11835#[target_feature(enable = "avx512bw")]
11836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11837#[cfg_attr(test, assert_instr(vpmovsxbw))]
11838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11839pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11840    unsafe {
11841        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11842        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11843    }
11844}
11845
11846/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11847///
11848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11849#[inline]
11850#[target_feature(enable = "avx512bw")]
11851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11852#[cfg_attr(test, assert_instr(vpmovsxbw))]
11853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11854pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11855    unsafe {
11856        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11857        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11858    }
11859}
11860
11861/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11862///
11863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11864#[inline]
11865#[target_feature(enable = "avx512bw,avx512vl")]
11866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11867#[cfg_attr(test, assert_instr(vpmovsxbw))]
11868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11869pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11870    unsafe {
11871        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11872        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11873    }
11874}
11875
11876/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11877///
11878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11879#[inline]
11880#[target_feature(enable = "avx512bw,avx512vl")]
11881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11882#[cfg_attr(test, assert_instr(vpmovsxbw))]
11883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11884pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11885    unsafe {
11886        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11887        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11888    }
11889}
11890
11891/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11892///
11893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11894#[inline]
11895#[target_feature(enable = "avx512bw,avx512vl")]
11896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11897#[cfg_attr(test, assert_instr(vpmovsxbw))]
11898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11899pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11902        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11903    }
11904}
11905
11906/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11909#[inline]
11910#[target_feature(enable = "avx512bw,avx512vl")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxbw))]
11913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11914pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11915    unsafe {
11916        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11917        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11918    }
11919}
11920
11921/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11922///
11923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11924#[inline]
11925#[target_feature(enable = "avx512bw")]
11926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11927#[cfg_attr(test, assert_instr(vpmovzxbw))]
11928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11929pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11930    unsafe {
11931        let a = a.as_u8x32();
11932        transmute::<i16x32, _>(simd_cast(a))
11933    }
11934}
11935
11936/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11937///
11938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11939#[inline]
11940#[target_feature(enable = "avx512bw")]
11941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11942#[cfg_attr(test, assert_instr(vpmovzxbw))]
11943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11944pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11945    unsafe {
11946        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11947        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11948    }
11949}
11950
11951/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11952///
11953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11954#[inline]
11955#[target_feature(enable = "avx512bw")]
11956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11957#[cfg_attr(test, assert_instr(vpmovzxbw))]
11958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11959pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11960    unsafe {
11961        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11962        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11963    }
11964}
11965
11966/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11967///
11968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11969#[inline]
11970#[target_feature(enable = "avx512bw,avx512vl")]
11971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11972#[cfg_attr(test, assert_instr(vpmovzxbw))]
11973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11974pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11975    unsafe {
11976        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11977        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11978    }
11979}
11980
11981/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11982///
11983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11984#[inline]
11985#[target_feature(enable = "avx512bw,avx512vl")]
11986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11987#[cfg_attr(test, assert_instr(vpmovzxbw))]
11988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11989pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11990    unsafe {
11991        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11992        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11993    }
11994}
11995
11996/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11997///
11998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11999#[inline]
12000#[target_feature(enable = "avx512bw,avx512vl")]
12001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12002#[cfg_attr(test, assert_instr(vpmovzxbw))]
12003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12004pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12005    unsafe {
12006        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12007        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12008    }
12009}
12010
12011/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12012///
12013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
12014#[inline]
12015#[target_feature(enable = "avx512bw,avx512vl")]
12016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12017#[cfg_attr(test, assert_instr(vpmovzxbw))]
12018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12019pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12020    unsafe {
12021        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12022        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12023    }
12024}
12025
12026/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
12027///
12028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
12029#[inline]
12030#[target_feature(enable = "avx512bw")]
12031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12032#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
12033#[rustc_legacy_const_generics(1)]
12034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12035pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12036    unsafe {
12037        static_assert_uimm_bits!(IMM8, 8);
12038        const fn mask(shift: i32, i: u32) -> u32 {
12039            let shift = shift as u32 & 0xff;
12040            if shift > 15 || i % 16 < shift {
12041                0
12042            } else {
12043                64 + (i - shift)
12044            }
12045        }
12046        let a = a.as_i8x64();
12047        let zero = i8x64::ZERO;
12048        let r: i8x64 = simd_shuffle!(
12049            zero,
12050            a,
12051            [
12052                mask(IMM8, 0),
12053                mask(IMM8, 1),
12054                mask(IMM8, 2),
12055                mask(IMM8, 3),
12056                mask(IMM8, 4),
12057                mask(IMM8, 5),
12058                mask(IMM8, 6),
12059                mask(IMM8, 7),
12060                mask(IMM8, 8),
12061                mask(IMM8, 9),
12062                mask(IMM8, 10),
12063                mask(IMM8, 11),
12064                mask(IMM8, 12),
12065                mask(IMM8, 13),
12066                mask(IMM8, 14),
12067                mask(IMM8, 15),
12068                mask(IMM8, 16),
12069                mask(IMM8, 17),
12070                mask(IMM8, 18),
12071                mask(IMM8, 19),
12072                mask(IMM8, 20),
12073                mask(IMM8, 21),
12074                mask(IMM8, 22),
12075                mask(IMM8, 23),
12076                mask(IMM8, 24),
12077                mask(IMM8, 25),
12078                mask(IMM8, 26),
12079                mask(IMM8, 27),
12080                mask(IMM8, 28),
12081                mask(IMM8, 29),
12082                mask(IMM8, 30),
12083                mask(IMM8, 31),
12084                mask(IMM8, 32),
12085                mask(IMM8, 33),
12086                mask(IMM8, 34),
12087                mask(IMM8, 35),
12088                mask(IMM8, 36),
12089                mask(IMM8, 37),
12090                mask(IMM8, 38),
12091                mask(IMM8, 39),
12092                mask(IMM8, 40),
12093                mask(IMM8, 41),
12094                mask(IMM8, 42),
12095                mask(IMM8, 43),
12096                mask(IMM8, 44),
12097                mask(IMM8, 45),
12098                mask(IMM8, 46),
12099                mask(IMM8, 47),
12100                mask(IMM8, 48),
12101                mask(IMM8, 49),
12102                mask(IMM8, 50),
12103                mask(IMM8, 51),
12104                mask(IMM8, 52),
12105                mask(IMM8, 53),
12106                mask(IMM8, 54),
12107                mask(IMM8, 55),
12108                mask(IMM8, 56),
12109                mask(IMM8, 57),
12110                mask(IMM8, 58),
12111                mask(IMM8, 59),
12112                mask(IMM8, 60),
12113                mask(IMM8, 61),
12114                mask(IMM8, 62),
12115                mask(IMM8, 63),
12116            ],
12117        );
12118        transmute(r)
12119    }
12120}
12121
12122/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
12123///
12124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
12125#[inline]
12126#[target_feature(enable = "avx512bw")]
12127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12128#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
12129#[rustc_legacy_const_generics(1)]
12130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12131pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12132    unsafe {
12133        static_assert_uimm_bits!(IMM8, 8);
12134        const fn mask(shift: i32, i: u32) -> u32 {
12135            let shift = shift as u32 & 0xff;
12136            if shift > 15 || (15 - (i % 16)) < shift {
12137                0
12138            } else {
12139                64 + (i + shift)
12140            }
12141        }
12142        let a = a.as_i8x64();
12143        let zero = i8x64::ZERO;
12144        let r: i8x64 = simd_shuffle!(
12145            zero,
12146            a,
12147            [
12148                mask(IMM8, 0),
12149                mask(IMM8, 1),
12150                mask(IMM8, 2),
12151                mask(IMM8, 3),
12152                mask(IMM8, 4),
12153                mask(IMM8, 5),
12154                mask(IMM8, 6),
12155                mask(IMM8, 7),
12156                mask(IMM8, 8),
12157                mask(IMM8, 9),
12158                mask(IMM8, 10),
12159                mask(IMM8, 11),
12160                mask(IMM8, 12),
12161                mask(IMM8, 13),
12162                mask(IMM8, 14),
12163                mask(IMM8, 15),
12164                mask(IMM8, 16),
12165                mask(IMM8, 17),
12166                mask(IMM8, 18),
12167                mask(IMM8, 19),
12168                mask(IMM8, 20),
12169                mask(IMM8, 21),
12170                mask(IMM8, 22),
12171                mask(IMM8, 23),
12172                mask(IMM8, 24),
12173                mask(IMM8, 25),
12174                mask(IMM8, 26),
12175                mask(IMM8, 27),
12176                mask(IMM8, 28),
12177                mask(IMM8, 29),
12178                mask(IMM8, 30),
12179                mask(IMM8, 31),
12180                mask(IMM8, 32),
12181                mask(IMM8, 33),
12182                mask(IMM8, 34),
12183                mask(IMM8, 35),
12184                mask(IMM8, 36),
12185                mask(IMM8, 37),
12186                mask(IMM8, 38),
12187                mask(IMM8, 39),
12188                mask(IMM8, 40),
12189                mask(IMM8, 41),
12190                mask(IMM8, 42),
12191                mask(IMM8, 43),
12192                mask(IMM8, 44),
12193                mask(IMM8, 45),
12194                mask(IMM8, 46),
12195                mask(IMM8, 47),
12196                mask(IMM8, 48),
12197                mask(IMM8, 49),
12198                mask(IMM8, 50),
12199                mask(IMM8, 51),
12200                mask(IMM8, 52),
12201                mask(IMM8, 53),
12202                mask(IMM8, 54),
12203                mask(IMM8, 55),
12204                mask(IMM8, 56),
12205                mask(IMM8, 57),
12206                mask(IMM8, 58),
12207                mask(IMM8, 59),
12208                mask(IMM8, 60),
12209                mask(IMM8, 61),
12210                mask(IMM8, 62),
12211                mask(IMM8, 63),
12212            ],
12213        );
12214        transmute(r)
12215    }
12216}
12217
12218/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
12219/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
12220/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
12221///
12222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
12223#[inline]
12224#[target_feature(enable = "avx512bw")]
12225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12226#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12227#[rustc_legacy_const_generics(2)]
12228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12229pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
12230    const fn mask(shift: u32, i: u32) -> u32 {
12231        let shift = shift % 16;
12232        let mod_i = i % 16;
12233        if mod_i < (16 - shift) {
12234            i + shift
12235        } else {
12236            i + 48 + shift
12237        }
12238    }
12239
12240    // If palignr is shifting the pair of vectors more than the size of two
12241    // lanes, emit zero.
12242    if IMM8 >= 32 {
12243        return _mm512_setzero_si512();
12244    }
12245    // If palignr is shifting the pair of input vectors more than one lane,
12246    // but less than two lanes, convert to shifting in zeroes.
12247    let (a, b) = if IMM8 > 16 {
12248        (_mm512_setzero_si512(), a)
12249    } else {
12250        (a, b)
12251    };
12252    unsafe {
12253        if IMM8 == 16 {
12254            return transmute(a);
12255        }
12256
12257        let r: i8x64 = simd_shuffle!(
12258            b.as_i8x64(),
12259            a.as_i8x64(),
12260            [
12261                mask(IMM8 as u32, 0),
12262                mask(IMM8 as u32, 1),
12263                mask(IMM8 as u32, 2),
12264                mask(IMM8 as u32, 3),
12265                mask(IMM8 as u32, 4),
12266                mask(IMM8 as u32, 5),
12267                mask(IMM8 as u32, 6),
12268                mask(IMM8 as u32, 7),
12269                mask(IMM8 as u32, 8),
12270                mask(IMM8 as u32, 9),
12271                mask(IMM8 as u32, 10),
12272                mask(IMM8 as u32, 11),
12273                mask(IMM8 as u32, 12),
12274                mask(IMM8 as u32, 13),
12275                mask(IMM8 as u32, 14),
12276                mask(IMM8 as u32, 15),
12277                mask(IMM8 as u32, 16),
12278                mask(IMM8 as u32, 17),
12279                mask(IMM8 as u32, 18),
12280                mask(IMM8 as u32, 19),
12281                mask(IMM8 as u32, 20),
12282                mask(IMM8 as u32, 21),
12283                mask(IMM8 as u32, 22),
12284                mask(IMM8 as u32, 23),
12285                mask(IMM8 as u32, 24),
12286                mask(IMM8 as u32, 25),
12287                mask(IMM8 as u32, 26),
12288                mask(IMM8 as u32, 27),
12289                mask(IMM8 as u32, 28),
12290                mask(IMM8 as u32, 29),
12291                mask(IMM8 as u32, 30),
12292                mask(IMM8 as u32, 31),
12293                mask(IMM8 as u32, 32),
12294                mask(IMM8 as u32, 33),
12295                mask(IMM8 as u32, 34),
12296                mask(IMM8 as u32, 35),
12297                mask(IMM8 as u32, 36),
12298                mask(IMM8 as u32, 37),
12299                mask(IMM8 as u32, 38),
12300                mask(IMM8 as u32, 39),
12301                mask(IMM8 as u32, 40),
12302                mask(IMM8 as u32, 41),
12303                mask(IMM8 as u32, 42),
12304                mask(IMM8 as u32, 43),
12305                mask(IMM8 as u32, 44),
12306                mask(IMM8 as u32, 45),
12307                mask(IMM8 as u32, 46),
12308                mask(IMM8 as u32, 47),
12309                mask(IMM8 as u32, 48),
12310                mask(IMM8 as u32, 49),
12311                mask(IMM8 as u32, 50),
12312                mask(IMM8 as u32, 51),
12313                mask(IMM8 as u32, 52),
12314                mask(IMM8 as u32, 53),
12315                mask(IMM8 as u32, 54),
12316                mask(IMM8 as u32, 55),
12317                mask(IMM8 as u32, 56),
12318                mask(IMM8 as u32, 57),
12319                mask(IMM8 as u32, 58),
12320                mask(IMM8 as u32, 59),
12321                mask(IMM8 as u32, 60),
12322                mask(IMM8 as u32, 61),
12323                mask(IMM8 as u32, 62),
12324                mask(IMM8 as u32, 63),
12325            ],
12326        );
12327        transmute(r)
12328    }
12329}
12330
12331/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12332///
12333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
12334#[inline]
12335#[target_feature(enable = "avx512bw")]
12336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12337#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12338#[rustc_legacy_const_generics(4)]
12339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12340pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
12341    src: __m512i,
12342    k: __mmask64,
12343    a: __m512i,
12344    b: __m512i,
12345) -> __m512i {
12346    unsafe {
12347        static_assert_uimm_bits!(IMM8, 8);
12348        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12349        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
12350    }
12351}
12352
12353/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12354///
12355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
12356#[inline]
12357#[target_feature(enable = "avx512bw")]
12358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12359#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12360#[rustc_legacy_const_generics(3)]
12361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12362pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
12363    k: __mmask64,
12364    a: __m512i,
12365    b: __m512i,
12366) -> __m512i {
12367    unsafe {
12368        static_assert_uimm_bits!(IMM8, 8);
12369        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12370        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
12371    }
12372}
12373
12374/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
12377#[inline]
12378#[target_feature(enable = "avx512bw,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[rustc_legacy_const_generics(4)]
12381#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12383pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
12384    src: __m256i,
12385    k: __mmask32,
12386    a: __m256i,
12387    b: __m256i,
12388) -> __m256i {
12389    unsafe {
12390        static_assert_uimm_bits!(IMM8, 8);
12391        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12392        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
12393    }
12394}
12395
12396/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
12399#[inline]
12400#[target_feature(enable = "avx512bw,avx512vl")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[rustc_legacy_const_generics(3)]
12403#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12405pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
12406    k: __mmask32,
12407    a: __m256i,
12408    b: __m256i,
12409) -> __m256i {
12410    unsafe {
12411        static_assert_uimm_bits!(IMM8, 8);
12412        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12413        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
12414    }
12415}
12416
12417/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12418///
12419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
12420#[inline]
12421#[target_feature(enable = "avx512bw,avx512vl")]
12422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12423#[rustc_legacy_const_generics(4)]
12424#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12426pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
12427    src: __m128i,
12428    k: __mmask16,
12429    a: __m128i,
12430    b: __m128i,
12431) -> __m128i {
12432    unsafe {
12433        static_assert_uimm_bits!(IMM8, 8);
12434        let r = _mm_alignr_epi8::<IMM8>(a, b);
12435        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
12436    }
12437}
12438
12439/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12440///
12441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
12442#[inline]
12443#[target_feature(enable = "avx512bw,avx512vl")]
12444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12445#[rustc_legacy_const_generics(3)]
12446#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12448pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
12449    k: __mmask16,
12450    a: __m128i,
12451    b: __m128i,
12452) -> __m128i {
12453    unsafe {
12454        static_assert_uimm_bits!(IMM8, 8);
12455        let r = _mm_alignr_epi8::<IMM8>(a, b);
12456        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
12457    }
12458}
12459
12460/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12461///
12462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
12463#[inline]
12464#[target_feature(enable = "avx512bw")]
12465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12466#[cfg_attr(test, assert_instr(vpmovswb))]
12467pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12468    vpmovswbmem(mem_addr, a.as_i16x32(), k);
12469}
12470
12471/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12472///
12473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
12474#[inline]
12475#[target_feature(enable = "avx512bw,avx512vl")]
12476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12477#[cfg_attr(test, assert_instr(vpmovswb))]
12478pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12479    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
12480}
12481
12482/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12483///
12484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
12485#[inline]
12486#[target_feature(enable = "avx512bw,avx512vl")]
12487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12488#[cfg_attr(test, assert_instr(vpmovswb))]
12489pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12490    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
12491}
12492
12493/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12494///
12495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
12496#[inline]
12497#[target_feature(enable = "avx512bw")]
12498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12499#[cfg_attr(test, assert_instr(vpmovwb))]
12500#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12501pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12502    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
12503    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
12504    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12505}
12506
12507/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12508///
12509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
12510#[inline]
12511#[target_feature(enable = "avx512bw,avx512vl")]
12512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12513#[cfg_attr(test, assert_instr(vpmovwb))]
12514#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12515pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12516    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
12517    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
12518    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12519}
12520
12521/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12522///
12523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
12524#[inline]
12525#[target_feature(enable = "avx512bw,avx512vl")]
12526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12527#[cfg_attr(test, assert_instr(vpmovwb))]
12528#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12529pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12530    let result: i8x8 = simd_shuffle!(
12531        _mm_cvtepi16_epi8(a).as_i8x16(),
12532        i8x16::ZERO,
12533        [0, 1, 2, 3, 4, 5, 6, 7]
12534    );
12535    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
12536    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12537}
12538
12539/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
12542#[inline]
12543#[target_feature(enable = "avx512bw")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovuswb))]
12546pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12547    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
12548}
12549
12550/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
12553#[inline]
12554#[target_feature(enable = "avx512bw,avx512vl")]
12555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12556#[cfg_attr(test, assert_instr(vpmovuswb))]
12557pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12558    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
12559}
12560
12561/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12562///
12563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
12564#[inline]
12565#[target_feature(enable = "avx512bw,avx512vl")]
12566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12567#[cfg_attr(test, assert_instr(vpmovuswb))]
12568pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12569    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
12570}
12571
12572#[allow(improper_ctypes)]
12573unsafe extern "C" {
12574    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
12575    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
12576
12577    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
12578    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
12579    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
12580    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
12581
12582    #[link_name = "llvm.x86.avx512.packssdw.512"]
12583    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
12584    #[link_name = "llvm.x86.avx512.packsswb.512"]
12585    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
12586    #[link_name = "llvm.x86.avx512.packusdw.512"]
12587    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
12588    #[link_name = "llvm.x86.avx512.packuswb.512"]
12589    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
12590
12591    #[link_name = "llvm.x86.avx512.psll.w.512"]
12592    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
12593
12594    #[link_name = "llvm.x86.avx512.psrl.w.512"]
12595    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
12596
12597    #[link_name = "llvm.x86.avx512.psra.w.512"]
12598    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
12599
12600    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
12601    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
12602    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
12603    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
12604    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
12605    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
12606
12607    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
12608    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
12609    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
12610    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
12611    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
12612    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
12613
12614    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
12615    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
12616
12617    #[link_name = "llvm.x86.avx512.psad.bw.512"]
12618    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
12619
12620    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
12621    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
12622    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
12623    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
12624    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
12625    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
12626
12627    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
12628    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
12629
12630    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
12631    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
12632
12633    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
12634    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12635    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
12636    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12637    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
12638    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12639
12640    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
12641    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12642    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
12643    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12644    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
12645    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12646}
12647
12648#[cfg(test)]
12649mod tests {
12650    use crate::core_arch::assert_eq_const as assert_eq;
12651
12652    use stdarch_test::simd_test;
12653
12654    use crate::core_arch::x86::*;
12655    use crate::hint::black_box;
12656    use crate::mem::{self};
12657
12658    #[simd_test(enable = "avx512bw")]
12659    const fn test_mm512_abs_epi16() {
12660        let a = _mm512_set1_epi16(-1);
12661        let r = _mm512_abs_epi16(a);
12662        let e = _mm512_set1_epi16(1);
12663        assert_eq_m512i(r, e);
12664    }
12665
12666    #[simd_test(enable = "avx512bw")]
12667    const fn test_mm512_mask_abs_epi16() {
12668        let a = _mm512_set1_epi16(-1);
12669        let r = _mm512_mask_abs_epi16(a, 0, a);
12670        assert_eq_m512i(r, a);
12671        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12672        #[rustfmt::skip]
12673        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12674                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12675        assert_eq_m512i(r, e);
12676    }
12677
12678    #[simd_test(enable = "avx512bw")]
12679    const fn test_mm512_maskz_abs_epi16() {
12680        let a = _mm512_set1_epi16(-1);
12681        let r = _mm512_maskz_abs_epi16(0, a);
12682        assert_eq_m512i(r, _mm512_setzero_si512());
12683        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12684        #[rustfmt::skip]
12685        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12686                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12687        assert_eq_m512i(r, e);
12688    }
12689
12690    #[simd_test(enable = "avx512bw,avx512vl")]
12691    const fn test_mm256_mask_abs_epi16() {
12692        let a = _mm256_set1_epi16(-1);
12693        let r = _mm256_mask_abs_epi16(a, 0, a);
12694        assert_eq_m256i(r, a);
12695        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12696        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12697        assert_eq_m256i(r, e);
12698    }
12699
12700    #[simd_test(enable = "avx512bw,avx512vl")]
12701    const fn test_mm256_maskz_abs_epi16() {
12702        let a = _mm256_set1_epi16(-1);
12703        let r = _mm256_maskz_abs_epi16(0, a);
12704        assert_eq_m256i(r, _mm256_setzero_si256());
12705        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12706        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12707        assert_eq_m256i(r, e);
12708    }
12709
12710    #[simd_test(enable = "avx512bw,avx512vl")]
12711    const fn test_mm_mask_abs_epi16() {
12712        let a = _mm_set1_epi16(-1);
12713        let r = _mm_mask_abs_epi16(a, 0, a);
12714        assert_eq_m128i(r, a);
12715        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12716        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12717        assert_eq_m128i(r, e);
12718    }
12719
12720    #[simd_test(enable = "avx512bw,avx512vl")]
12721    const fn test_mm_maskz_abs_epi16() {
12722        let a = _mm_set1_epi16(-1);
12723        let r = _mm_maskz_abs_epi16(0, a);
12724        assert_eq_m128i(r, _mm_setzero_si128());
12725        let r = _mm_maskz_abs_epi16(0b00001111, a);
12726        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12727        assert_eq_m128i(r, e);
12728    }
12729
12730    #[simd_test(enable = "avx512bw")]
12731    const fn test_mm512_abs_epi8() {
12732        let a = _mm512_set1_epi8(-1);
12733        let r = _mm512_abs_epi8(a);
12734        let e = _mm512_set1_epi8(1);
12735        assert_eq_m512i(r, e);
12736    }
12737
12738    #[simd_test(enable = "avx512bw")]
12739    const fn test_mm512_mask_abs_epi8() {
12740        let a = _mm512_set1_epi8(-1);
12741        let r = _mm512_mask_abs_epi8(a, 0, a);
12742        assert_eq_m512i(r, a);
12743        let r = _mm512_mask_abs_epi8(
12744            a,
12745            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12746            a,
12747        );
12748        #[rustfmt::skip]
12749        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12750                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12751                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12752                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12753        assert_eq_m512i(r, e);
12754    }
12755
12756    #[simd_test(enable = "avx512bw")]
12757    const fn test_mm512_maskz_abs_epi8() {
12758        let a = _mm512_set1_epi8(-1);
12759        let r = _mm512_maskz_abs_epi8(0, a);
12760        assert_eq_m512i(r, _mm512_setzero_si512());
12761        let r = _mm512_maskz_abs_epi8(
12762            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12763            a,
12764        );
12765        #[rustfmt::skip]
12766        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12767                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12768                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12769                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12770        assert_eq_m512i(r, e);
12771    }
12772
12773    #[simd_test(enable = "avx512bw,avx512vl")]
12774    const fn test_mm256_mask_abs_epi8() {
12775        let a = _mm256_set1_epi8(-1);
12776        let r = _mm256_mask_abs_epi8(a, 0, a);
12777        assert_eq_m256i(r, a);
12778        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12779        #[rustfmt::skip]
12780        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12781                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12782        assert_eq_m256i(r, e);
12783    }
12784
12785    #[simd_test(enable = "avx512bw,avx512vl")]
12786    const fn test_mm256_maskz_abs_epi8() {
12787        let a = _mm256_set1_epi8(-1);
12788        let r = _mm256_maskz_abs_epi8(0, a);
12789        assert_eq_m256i(r, _mm256_setzero_si256());
12790        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12791        #[rustfmt::skip]
12792        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12793                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12794        assert_eq_m256i(r, e);
12795    }
12796
12797    #[simd_test(enable = "avx512bw,avx512vl")]
12798    const fn test_mm_mask_abs_epi8() {
12799        let a = _mm_set1_epi8(-1);
12800        let r = _mm_mask_abs_epi8(a, 0, a);
12801        assert_eq_m128i(r, a);
12802        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12803        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12804        assert_eq_m128i(r, e);
12805    }
12806
12807    #[simd_test(enable = "avx512bw,avx512vl")]
12808    const fn test_mm_maskz_abs_epi8() {
12809        let a = _mm_set1_epi8(-1);
12810        let r = _mm_maskz_abs_epi8(0, a);
12811        assert_eq_m128i(r, _mm_setzero_si128());
12812        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12813        #[rustfmt::skip]
12814        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12815        assert_eq_m128i(r, e);
12816    }
12817
12818    #[simd_test(enable = "avx512bw")]
12819    const fn test_mm512_add_epi16() {
12820        let a = _mm512_set1_epi16(1);
12821        let b = _mm512_set1_epi16(2);
12822        let r = _mm512_add_epi16(a, b);
12823        let e = _mm512_set1_epi16(3);
12824        assert_eq_m512i(r, e);
12825    }
12826
12827    #[simd_test(enable = "avx512bw")]
12828    const fn test_mm512_mask_add_epi16() {
12829        let a = _mm512_set1_epi16(1);
12830        let b = _mm512_set1_epi16(2);
12831        let r = _mm512_mask_add_epi16(a, 0, a, b);
12832        assert_eq_m512i(r, a);
12833        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12834        #[rustfmt::skip]
12835        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12836                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12837        assert_eq_m512i(r, e);
12838    }
12839
12840    #[simd_test(enable = "avx512bw")]
12841    const fn test_mm512_maskz_add_epi16() {
12842        let a = _mm512_set1_epi16(1);
12843        let b = _mm512_set1_epi16(2);
12844        let r = _mm512_maskz_add_epi16(0, a, b);
12845        assert_eq_m512i(r, _mm512_setzero_si512());
12846        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
12847        #[rustfmt::skip]
12848        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12849                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12850        assert_eq_m512i(r, e);
12851    }
12852
12853    #[simd_test(enable = "avx512bw,avx512vl")]
12854    const fn test_mm256_mask_add_epi16() {
12855        let a = _mm256_set1_epi16(1);
12856        let b = _mm256_set1_epi16(2);
12857        let r = _mm256_mask_add_epi16(a, 0, a, b);
12858        assert_eq_m256i(r, a);
12859        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
12860        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12861        assert_eq_m256i(r, e);
12862    }
12863
12864    #[simd_test(enable = "avx512bw,avx512vl")]
12865    const fn test_mm256_maskz_add_epi16() {
12866        let a = _mm256_set1_epi16(1);
12867        let b = _mm256_set1_epi16(2);
12868        let r = _mm256_maskz_add_epi16(0, a, b);
12869        assert_eq_m256i(r, _mm256_setzero_si256());
12870        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
12871        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12872        assert_eq_m256i(r, e);
12873    }
12874
12875    #[simd_test(enable = "avx512bw,avx512vl")]
12876    const fn test_mm_mask_add_epi16() {
12877        let a = _mm_set1_epi16(1);
12878        let b = _mm_set1_epi16(2);
12879        let r = _mm_mask_add_epi16(a, 0, a, b);
12880        assert_eq_m128i(r, a);
12881        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12882        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12883        assert_eq_m128i(r, e);
12884    }
12885
12886    #[simd_test(enable = "avx512bw,avx512vl")]
12887    const fn test_mm_maskz_add_epi16() {
12888        let a = _mm_set1_epi16(1);
12889        let b = _mm_set1_epi16(2);
12890        let r = _mm_maskz_add_epi16(0, a, b);
12891        assert_eq_m128i(r, _mm_setzero_si128());
12892        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12893        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12894        assert_eq_m128i(r, e);
12895    }
12896
12897    #[simd_test(enable = "avx512bw")]
12898    const fn test_mm512_add_epi8() {
12899        let a = _mm512_set1_epi8(1);
12900        let b = _mm512_set1_epi8(2);
12901        let r = _mm512_add_epi8(a, b);
12902        let e = _mm512_set1_epi8(3);
12903        assert_eq_m512i(r, e);
12904    }
12905
12906    #[simd_test(enable = "avx512bw")]
12907    const fn test_mm512_mask_add_epi8() {
12908        let a = _mm512_set1_epi8(1);
12909        let b = _mm512_set1_epi8(2);
12910        let r = _mm512_mask_add_epi8(a, 0, a, b);
12911        assert_eq_m512i(r, a);
12912        let r = _mm512_mask_add_epi8(
12913            a,
12914            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12915            a,
12916            b,
12917        );
12918        #[rustfmt::skip]
12919        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12920                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12921                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12922                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12923        assert_eq_m512i(r, e);
12924    }
12925
12926    #[simd_test(enable = "avx512bw")]
12927    const fn test_mm512_maskz_add_epi8() {
12928        let a = _mm512_set1_epi8(1);
12929        let b = _mm512_set1_epi8(2);
12930        let r = _mm512_maskz_add_epi8(0, a, b);
12931        assert_eq_m512i(r, _mm512_setzero_si512());
12932        let r = _mm512_maskz_add_epi8(
12933            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12934            a,
12935            b,
12936        );
12937        #[rustfmt::skip]
12938        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12939                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12940                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12941                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12942        assert_eq_m512i(r, e);
12943    }
12944
12945    #[simd_test(enable = "avx512bw,avx512vl")]
12946    const fn test_mm256_mask_add_epi8() {
12947        let a = _mm256_set1_epi8(1);
12948        let b = _mm256_set1_epi8(2);
12949        let r = _mm256_mask_add_epi8(a, 0, a, b);
12950        assert_eq_m256i(r, a);
12951        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12952        #[rustfmt::skip]
12953        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12954                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12955        assert_eq_m256i(r, e);
12956    }
12957
12958    #[simd_test(enable = "avx512bw,avx512vl")]
12959    const fn test_mm256_maskz_add_epi8() {
12960        let a = _mm256_set1_epi8(1);
12961        let b = _mm256_set1_epi8(2);
12962        let r = _mm256_maskz_add_epi8(0, a, b);
12963        assert_eq_m256i(r, _mm256_setzero_si256());
12964        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12965        #[rustfmt::skip]
12966        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12967                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12968        assert_eq_m256i(r, e);
12969    }
12970
12971    #[simd_test(enable = "avx512bw,avx512vl")]
12972    const fn test_mm_mask_add_epi8() {
12973        let a = _mm_set1_epi8(1);
12974        let b = _mm_set1_epi8(2);
12975        let r = _mm_mask_add_epi8(a, 0, a, b);
12976        assert_eq_m128i(r, a);
12977        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12978        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12979        assert_eq_m128i(r, e);
12980    }
12981
12982    #[simd_test(enable = "avx512bw,avx512vl")]
12983    const fn test_mm_maskz_add_epi8() {
12984        let a = _mm_set1_epi8(1);
12985        let b = _mm_set1_epi8(2);
12986        let r = _mm_maskz_add_epi8(0, a, b);
12987        assert_eq_m128i(r, _mm_setzero_si128());
12988        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12989        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12990        assert_eq_m128i(r, e);
12991    }
12992
12993    #[simd_test(enable = "avx512bw")]
12994    const fn test_mm512_adds_epu16() {
12995        let a = _mm512_set1_epi16(1);
12996        let b = _mm512_set1_epi16(u16::MAX as i16);
12997        let r = _mm512_adds_epu16(a, b);
12998        let e = _mm512_set1_epi16(u16::MAX as i16);
12999        assert_eq_m512i(r, e);
13000    }
13001
13002    #[simd_test(enable = "avx512bw")]
13003    const fn test_mm512_mask_adds_epu16() {
13004        let a = _mm512_set1_epi16(1);
13005        let b = _mm512_set1_epi16(u16::MAX as i16);
13006        let r = _mm512_mask_adds_epu16(a, 0, a, b);
13007        assert_eq_m512i(r, a);
13008        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13009        #[rustfmt::skip]
13010        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13011                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13012        assert_eq_m512i(r, e);
13013    }
13014
13015    #[simd_test(enable = "avx512bw")]
13016    const fn test_mm512_maskz_adds_epu16() {
13017        let a = _mm512_set1_epi16(1);
13018        let b = _mm512_set1_epi16(u16::MAX as i16);
13019        let r = _mm512_maskz_adds_epu16(0, a, b);
13020        assert_eq_m512i(r, _mm512_setzero_si512());
13021        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
13022        #[rustfmt::skip]
13023        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13024                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13025        assert_eq_m512i(r, e);
13026    }
13027
13028    #[simd_test(enable = "avx512bw,avx512vl")]
13029    const fn test_mm256_mask_adds_epu16() {
13030        let a = _mm256_set1_epi16(1);
13031        let b = _mm256_set1_epi16(u16::MAX as i16);
13032        let r = _mm256_mask_adds_epu16(a, 0, a, b);
13033        assert_eq_m256i(r, a);
13034        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
13035        #[rustfmt::skip]
13036        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13037        assert_eq_m256i(r, e);
13038    }
13039
13040    #[simd_test(enable = "avx512bw,avx512vl")]
13041    const fn test_mm256_maskz_adds_epu16() {
13042        let a = _mm256_set1_epi16(1);
13043        let b = _mm256_set1_epi16(u16::MAX as i16);
13044        let r = _mm256_maskz_adds_epu16(0, a, b);
13045        assert_eq_m256i(r, _mm256_setzero_si256());
13046        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
13047        #[rustfmt::skip]
13048        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13049        assert_eq_m256i(r, e);
13050    }
13051
13052    #[simd_test(enable = "avx512bw,avx512vl")]
13053    const fn test_mm_mask_adds_epu16() {
13054        let a = _mm_set1_epi16(1);
13055        let b = _mm_set1_epi16(u16::MAX as i16);
13056        let r = _mm_mask_adds_epu16(a, 0, a, b);
13057        assert_eq_m128i(r, a);
13058        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
13059        #[rustfmt::skip]
13060        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13061        assert_eq_m128i(r, e);
13062    }
13063
13064    #[simd_test(enable = "avx512bw,avx512vl")]
13065    const fn test_mm_maskz_adds_epu16() {
13066        let a = _mm_set1_epi16(1);
13067        let b = _mm_set1_epi16(u16::MAX as i16);
13068        let r = _mm_maskz_adds_epu16(0, a, b);
13069        assert_eq_m128i(r, _mm_setzero_si128());
13070        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
13071        #[rustfmt::skip]
13072        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13073        assert_eq_m128i(r, e);
13074    }
13075
13076    #[simd_test(enable = "avx512bw")]
13077    const fn test_mm512_adds_epu8() {
13078        let a = _mm512_set1_epi8(1);
13079        let b = _mm512_set1_epi8(u8::MAX as i8);
13080        let r = _mm512_adds_epu8(a, b);
13081        let e = _mm512_set1_epi8(u8::MAX as i8);
13082        assert_eq_m512i(r, e);
13083    }
13084
13085    #[simd_test(enable = "avx512bw")]
13086    const fn test_mm512_mask_adds_epu8() {
13087        let a = _mm512_set1_epi8(1);
13088        let b = _mm512_set1_epi8(u8::MAX as i8);
13089        let r = _mm512_mask_adds_epu8(a, 0, a, b);
13090        assert_eq_m512i(r, a);
13091        let r = _mm512_mask_adds_epu8(
13092            a,
13093            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13094            a,
13095            b,
13096        );
13097        #[rustfmt::skip]
13098        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13099                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13100                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13101                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13102        assert_eq_m512i(r, e);
13103    }
13104
13105    #[simd_test(enable = "avx512bw")]
13106    const fn test_mm512_maskz_adds_epu8() {
13107        let a = _mm512_set1_epi8(1);
13108        let b = _mm512_set1_epi8(u8::MAX as i8);
13109        let r = _mm512_maskz_adds_epu8(0, a, b);
13110        assert_eq_m512i(r, _mm512_setzero_si512());
13111        let r = _mm512_maskz_adds_epu8(
13112            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13113            a,
13114            b,
13115        );
13116        #[rustfmt::skip]
13117        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13118                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13119                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13120                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13121        assert_eq_m512i(r, e);
13122    }
13123
13124    #[simd_test(enable = "avx512bw,avx512vl")]
13125    const fn test_mm256_mask_adds_epu8() {
13126        let a = _mm256_set1_epi8(1);
13127        let b = _mm256_set1_epi8(u8::MAX as i8);
13128        let r = _mm256_mask_adds_epu8(a, 0, a, b);
13129        assert_eq_m256i(r, a);
13130        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13131        #[rustfmt::skip]
13132        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13133                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13134        assert_eq_m256i(r, e);
13135    }
13136
13137    #[simd_test(enable = "avx512bw,avx512vl")]
13138    const fn test_mm256_maskz_adds_epu8() {
13139        let a = _mm256_set1_epi8(1);
13140        let b = _mm256_set1_epi8(u8::MAX as i8);
13141        let r = _mm256_maskz_adds_epu8(0, a, b);
13142        assert_eq_m256i(r, _mm256_setzero_si256());
13143        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
13144        #[rustfmt::skip]
13145        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13146                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13147        assert_eq_m256i(r, e);
13148    }
13149
13150    #[simd_test(enable = "avx512bw,avx512vl")]
13151    const fn test_mm_mask_adds_epu8() {
13152        let a = _mm_set1_epi8(1);
13153        let b = _mm_set1_epi8(u8::MAX as i8);
13154        let r = _mm_mask_adds_epu8(a, 0, a, b);
13155        assert_eq_m128i(r, a);
13156        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
13157        #[rustfmt::skip]
13158        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13159        assert_eq_m128i(r, e);
13160    }
13161
13162    #[simd_test(enable = "avx512bw,avx512vl")]
13163    const fn test_mm_maskz_adds_epu8() {
13164        let a = _mm_set1_epi8(1);
13165        let b = _mm_set1_epi8(u8::MAX as i8);
13166        let r = _mm_maskz_adds_epu8(0, a, b);
13167        assert_eq_m128i(r, _mm_setzero_si128());
13168        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
13169        #[rustfmt::skip]
13170        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13171        assert_eq_m128i(r, e);
13172    }
13173
13174    #[simd_test(enable = "avx512bw")]
13175    const fn test_mm512_adds_epi16() {
13176        let a = _mm512_set1_epi16(1);
13177        let b = _mm512_set1_epi16(i16::MAX);
13178        let r = _mm512_adds_epi16(a, b);
13179        let e = _mm512_set1_epi16(i16::MAX);
13180        assert_eq_m512i(r, e);
13181    }
13182
13183    #[simd_test(enable = "avx512bw")]
13184    const fn test_mm512_mask_adds_epi16() {
13185        let a = _mm512_set1_epi16(1);
13186        let b = _mm512_set1_epi16(i16::MAX);
13187        let r = _mm512_mask_adds_epi16(a, 0, a, b);
13188        assert_eq_m512i(r, a);
13189        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13190        #[rustfmt::skip]
13191        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13192                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13193        assert_eq_m512i(r, e);
13194    }
13195
13196    #[simd_test(enable = "avx512bw")]
13197    const fn test_mm512_maskz_adds_epi16() {
13198        let a = _mm512_set1_epi16(1);
13199        let b = _mm512_set1_epi16(i16::MAX);
13200        let r = _mm512_maskz_adds_epi16(0, a, b);
13201        assert_eq_m512i(r, _mm512_setzero_si512());
13202        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
13203        #[rustfmt::skip]
13204        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13205                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13206        assert_eq_m512i(r, e);
13207    }
13208
13209    #[simd_test(enable = "avx512bw,avx512vl")]
13210    const fn test_mm256_mask_adds_epi16() {
13211        let a = _mm256_set1_epi16(1);
13212        let b = _mm256_set1_epi16(i16::MAX);
13213        let r = _mm256_mask_adds_epi16(a, 0, a, b);
13214        assert_eq_m256i(r, a);
13215        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
13216        #[rustfmt::skip]
13217        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13218        assert_eq_m256i(r, e);
13219    }
13220
13221    #[simd_test(enable = "avx512bw,avx512vl")]
13222    const fn test_mm256_maskz_adds_epi16() {
13223        let a = _mm256_set1_epi16(1);
13224        let b = _mm256_set1_epi16(i16::MAX);
13225        let r = _mm256_maskz_adds_epi16(0, a, b);
13226        assert_eq_m256i(r, _mm256_setzero_si256());
13227        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
13228        #[rustfmt::skip]
13229        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13230        assert_eq_m256i(r, e);
13231    }
13232
13233    #[simd_test(enable = "avx512bw,avx512vl")]
13234    const fn test_mm_mask_adds_epi16() {
13235        let a = _mm_set1_epi16(1);
13236        let b = _mm_set1_epi16(i16::MAX);
13237        let r = _mm_mask_adds_epi16(a, 0, a, b);
13238        assert_eq_m128i(r, a);
13239        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
13240        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13241        assert_eq_m128i(r, e);
13242    }
13243
13244    #[simd_test(enable = "avx512bw,avx512vl")]
13245    const fn test_mm_maskz_adds_epi16() {
13246        let a = _mm_set1_epi16(1);
13247        let b = _mm_set1_epi16(i16::MAX);
13248        let r = _mm_maskz_adds_epi16(0, a, b);
13249        assert_eq_m128i(r, _mm_setzero_si128());
13250        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
13251        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13252        assert_eq_m128i(r, e);
13253    }
13254
13255    #[simd_test(enable = "avx512bw")]
13256    const fn test_mm512_adds_epi8() {
13257        let a = _mm512_set1_epi8(1);
13258        let b = _mm512_set1_epi8(i8::MAX);
13259        let r = _mm512_adds_epi8(a, b);
13260        let e = _mm512_set1_epi8(i8::MAX);
13261        assert_eq_m512i(r, e);
13262    }
13263
13264    #[simd_test(enable = "avx512bw")]
13265    const fn test_mm512_mask_adds_epi8() {
13266        let a = _mm512_set1_epi8(1);
13267        let b = _mm512_set1_epi8(i8::MAX);
13268        let r = _mm512_mask_adds_epi8(a, 0, a, b);
13269        assert_eq_m512i(r, a);
13270        let r = _mm512_mask_adds_epi8(
13271            a,
13272            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13273            a,
13274            b,
13275        );
13276        #[rustfmt::skip]
13277        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13278                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13279                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13280                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13281        assert_eq_m512i(r, e);
13282    }
13283
13284    #[simd_test(enable = "avx512bw")]
13285    const fn test_mm512_maskz_adds_epi8() {
13286        let a = _mm512_set1_epi8(1);
13287        let b = _mm512_set1_epi8(i8::MAX);
13288        let r = _mm512_maskz_adds_epi8(0, a, b);
13289        assert_eq_m512i(r, _mm512_setzero_si512());
13290        let r = _mm512_maskz_adds_epi8(
13291            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13292            a,
13293            b,
13294        );
13295        #[rustfmt::skip]
13296        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13297                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13298                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13299                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13300        assert_eq_m512i(r, e);
13301    }
13302
13303    #[simd_test(enable = "avx512bw,avx512vl")]
13304    const fn test_mm256_mask_adds_epi8() {
13305        let a = _mm256_set1_epi8(1);
13306        let b = _mm256_set1_epi8(i8::MAX);
13307        let r = _mm256_mask_adds_epi8(a, 0, a, b);
13308        assert_eq_m256i(r, a);
13309        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13310        #[rustfmt::skip]
13311        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13312                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13313        assert_eq_m256i(r, e);
13314    }
13315
13316    #[simd_test(enable = "avx512bw,avx512vl")]
13317    const fn test_mm256_maskz_adds_epi8() {
13318        let a = _mm256_set1_epi8(1);
13319        let b = _mm256_set1_epi8(i8::MAX);
13320        let r = _mm256_maskz_adds_epi8(0, a, b);
13321        assert_eq_m256i(r, _mm256_setzero_si256());
13322        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
13323        #[rustfmt::skip]
13324        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13325                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13326        assert_eq_m256i(r, e);
13327    }
13328
13329    #[simd_test(enable = "avx512bw,avx512vl")]
13330    const fn test_mm_mask_adds_epi8() {
13331        let a = _mm_set1_epi8(1);
13332        let b = _mm_set1_epi8(i8::MAX);
13333        let r = _mm_mask_adds_epi8(a, 0, a, b);
13334        assert_eq_m128i(r, a);
13335        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
13336        #[rustfmt::skip]
13337        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13338        assert_eq_m128i(r, e);
13339    }
13340
13341    #[simd_test(enable = "avx512bw,avx512vl")]
13342    const fn test_mm_maskz_adds_epi8() {
13343        let a = _mm_set1_epi8(1);
13344        let b = _mm_set1_epi8(i8::MAX);
13345        let r = _mm_maskz_adds_epi8(0, a, b);
13346        assert_eq_m128i(r, _mm_setzero_si128());
13347        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
13348        #[rustfmt::skip]
13349        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13350        assert_eq_m128i(r, e);
13351    }
13352
13353    #[simd_test(enable = "avx512bw")]
13354    const fn test_mm512_sub_epi16() {
13355        let a = _mm512_set1_epi16(1);
13356        let b = _mm512_set1_epi16(2);
13357        let r = _mm512_sub_epi16(a, b);
13358        let e = _mm512_set1_epi16(-1);
13359        assert_eq_m512i(r, e);
13360    }
13361
13362    #[simd_test(enable = "avx512bw")]
13363    const fn test_mm512_mask_sub_epi16() {
13364        let a = _mm512_set1_epi16(1);
13365        let b = _mm512_set1_epi16(2);
13366        let r = _mm512_mask_sub_epi16(a, 0, a, b);
13367        assert_eq_m512i(r, a);
13368        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13369        #[rustfmt::skip]
13370        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13371                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13372        assert_eq_m512i(r, e);
13373    }
13374
13375    #[simd_test(enable = "avx512bw")]
13376    const fn test_mm512_maskz_sub_epi16() {
13377        let a = _mm512_set1_epi16(1);
13378        let b = _mm512_set1_epi16(2);
13379        let r = _mm512_maskz_sub_epi16(0, a, b);
13380        assert_eq_m512i(r, _mm512_setzero_si512());
13381        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
13382        #[rustfmt::skip]
13383        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13384                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13385        assert_eq_m512i(r, e);
13386    }
13387
13388    #[simd_test(enable = "avx512bw,avx512vl")]
13389    const fn test_mm256_mask_sub_epi16() {
13390        let a = _mm256_set1_epi16(1);
13391        let b = _mm256_set1_epi16(2);
13392        let r = _mm256_mask_sub_epi16(a, 0, a, b);
13393        assert_eq_m256i(r, a);
13394        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
13395        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13396        assert_eq_m256i(r, e);
13397    }
13398
13399    #[simd_test(enable = "avx512bw,avx512vl")]
13400    const fn test_mm256_maskz_sub_epi16() {
13401        let a = _mm256_set1_epi16(1);
13402        let b = _mm256_set1_epi16(2);
13403        let r = _mm256_maskz_sub_epi16(0, a, b);
13404        assert_eq_m256i(r, _mm256_setzero_si256());
13405        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
13406        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13407        assert_eq_m256i(r, e);
13408    }
13409
13410    #[simd_test(enable = "avx512bw,avx512vl")]
13411    const fn test_mm_mask_sub_epi16() {
13412        let a = _mm_set1_epi16(1);
13413        let b = _mm_set1_epi16(2);
13414        let r = _mm_mask_sub_epi16(a, 0, a, b);
13415        assert_eq_m128i(r, a);
13416        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
13417        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
13418        assert_eq_m128i(r, e);
13419    }
13420
13421    #[simd_test(enable = "avx512bw,avx512vl")]
13422    const fn test_mm_maskz_sub_epi16() {
13423        let a = _mm_set1_epi16(1);
13424        let b = _mm_set1_epi16(2);
13425        let r = _mm_maskz_sub_epi16(0, a, b);
13426        assert_eq_m128i(r, _mm_setzero_si128());
13427        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
13428        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
13429        assert_eq_m128i(r, e);
13430    }
13431
13432    #[simd_test(enable = "avx512bw")]
13433    const fn test_mm512_sub_epi8() {
13434        let a = _mm512_set1_epi8(1);
13435        let b = _mm512_set1_epi8(2);
13436        let r = _mm512_sub_epi8(a, b);
13437        let e = _mm512_set1_epi8(-1);
13438        assert_eq_m512i(r, e);
13439    }
13440
13441    #[simd_test(enable = "avx512bw")]
13442    const fn test_mm512_mask_sub_epi8() {
13443        let a = _mm512_set1_epi8(1);
13444        let b = _mm512_set1_epi8(2);
13445        let r = _mm512_mask_sub_epi8(a, 0, a, b);
13446        assert_eq_m512i(r, a);
13447        let r = _mm512_mask_sub_epi8(
13448            a,
13449            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13450            a,
13451            b,
13452        );
13453        #[rustfmt::skip]
13454        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13455                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13456                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13457                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13458        assert_eq_m512i(r, e);
13459    }
13460
13461    #[simd_test(enable = "avx512bw")]
13462    const fn test_mm512_maskz_sub_epi8() {
13463        let a = _mm512_set1_epi8(1);
13464        let b = _mm512_set1_epi8(2);
13465        let r = _mm512_maskz_sub_epi8(0, a, b);
13466        assert_eq_m512i(r, _mm512_setzero_si512());
13467        let r = _mm512_maskz_sub_epi8(
13468            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13469            a,
13470            b,
13471        );
13472        #[rustfmt::skip]
13473        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13474                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13475                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13476                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13477        assert_eq_m512i(r, e);
13478    }
13479
13480    #[simd_test(enable = "avx512bw,avx512vl")]
13481    const fn test_mm256_mask_sub_epi8() {
13482        let a = _mm256_set1_epi8(1);
13483        let b = _mm256_set1_epi8(2);
13484        let r = _mm256_mask_sub_epi8(a, 0, a, b);
13485        assert_eq_m256i(r, a);
13486        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13487        #[rustfmt::skip]
13488        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13489                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13490        assert_eq_m256i(r, e);
13491    }
13492
13493    #[simd_test(enable = "avx512bw,avx512vl")]
13494    const fn test_mm256_maskz_sub_epi8() {
13495        let a = _mm256_set1_epi8(1);
13496        let b = _mm256_set1_epi8(2);
13497        let r = _mm256_maskz_sub_epi8(0, a, b);
13498        assert_eq_m256i(r, _mm256_setzero_si256());
13499        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
13500        #[rustfmt::skip]
13501        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13502                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13503        assert_eq_m256i(r, e);
13504    }
13505
13506    #[simd_test(enable = "avx512bw,avx512vl")]
13507    const fn test_mm_mask_sub_epi8() {
13508        let a = _mm_set1_epi8(1);
13509        let b = _mm_set1_epi8(2);
13510        let r = _mm_mask_sub_epi8(a, 0, a, b);
13511        assert_eq_m128i(r, a);
13512        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
13513        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13514        assert_eq_m128i(r, e);
13515    }
13516
13517    #[simd_test(enable = "avx512bw,avx512vl")]
13518    const fn test_mm_maskz_sub_epi8() {
13519        let a = _mm_set1_epi8(1);
13520        let b = _mm_set1_epi8(2);
13521        let r = _mm_maskz_sub_epi8(0, a, b);
13522        assert_eq_m128i(r, _mm_setzero_si128());
13523        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
13524        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13525        assert_eq_m128i(r, e);
13526    }
13527
13528    #[simd_test(enable = "avx512bw")]
13529    const fn test_mm512_subs_epu16() {
13530        let a = _mm512_set1_epi16(1);
13531        let b = _mm512_set1_epi16(u16::MAX as i16);
13532        let r = _mm512_subs_epu16(a, b);
13533        let e = _mm512_set1_epi16(0);
13534        assert_eq_m512i(r, e);
13535    }
13536
13537    #[simd_test(enable = "avx512bw")]
13538    const fn test_mm512_mask_subs_epu16() {
13539        let a = _mm512_set1_epi16(1);
13540        let b = _mm512_set1_epi16(u16::MAX as i16);
13541        let r = _mm512_mask_subs_epu16(a, 0, a, b);
13542        assert_eq_m512i(r, a);
13543        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13544        #[rustfmt::skip]
13545        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13546                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13547        assert_eq_m512i(r, e);
13548    }
13549
13550    #[simd_test(enable = "avx512bw")]
13551    const fn test_mm512_maskz_subs_epu16() {
13552        let a = _mm512_set1_epi16(1);
13553        let b = _mm512_set1_epi16(u16::MAX as i16);
13554        let r = _mm512_maskz_subs_epu16(0, a, b);
13555        assert_eq_m512i(r, _mm512_setzero_si512());
13556        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
13557        #[rustfmt::skip]
13558        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13559                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13560        assert_eq_m512i(r, e);
13561    }
13562
13563    #[simd_test(enable = "avx512bw,avx512vl")]
13564    const fn test_mm256_mask_subs_epu16() {
13565        let a = _mm256_set1_epi16(1);
13566        let b = _mm256_set1_epi16(u16::MAX as i16);
13567        let r = _mm256_mask_subs_epu16(a, 0, a, b);
13568        assert_eq_m256i(r, a);
13569        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
13570        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13571        assert_eq_m256i(r, e);
13572    }
13573
13574    #[simd_test(enable = "avx512bw,avx512vl")]
13575    const fn test_mm256_maskz_subs_epu16() {
13576        let a = _mm256_set1_epi16(1);
13577        let b = _mm256_set1_epi16(u16::MAX as i16);
13578        let r = _mm256_maskz_subs_epu16(0, a, b);
13579        assert_eq_m256i(r, _mm256_setzero_si256());
13580        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
13581        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13582        assert_eq_m256i(r, e);
13583    }
13584
13585    #[simd_test(enable = "avx512bw,avx512vl")]
13586    const fn test_mm_mask_subs_epu16() {
13587        let a = _mm_set1_epi16(1);
13588        let b = _mm_set1_epi16(u16::MAX as i16);
13589        let r = _mm_mask_subs_epu16(a, 0, a, b);
13590        assert_eq_m128i(r, a);
13591        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
13592        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13593        assert_eq_m128i(r, e);
13594    }
13595
13596    #[simd_test(enable = "avx512bw,avx512vl")]
13597    const fn test_mm_maskz_subs_epu16() {
13598        let a = _mm_set1_epi16(1);
13599        let b = _mm_set1_epi16(u16::MAX as i16);
13600        let r = _mm_maskz_subs_epu16(0, a, b);
13601        assert_eq_m128i(r, _mm_setzero_si128());
13602        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
13603        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13604        assert_eq_m128i(r, e);
13605    }
13606
13607    #[simd_test(enable = "avx512bw")]
13608    const fn test_mm512_subs_epu8() {
13609        let a = _mm512_set1_epi8(1);
13610        let b = _mm512_set1_epi8(u8::MAX as i8);
13611        let r = _mm512_subs_epu8(a, b);
13612        let e = _mm512_set1_epi8(0);
13613        assert_eq_m512i(r, e);
13614    }
13615
13616    #[simd_test(enable = "avx512bw")]
13617    const fn test_mm512_mask_subs_epu8() {
13618        let a = _mm512_set1_epi8(1);
13619        let b = _mm512_set1_epi8(u8::MAX as i8);
13620        let r = _mm512_mask_subs_epu8(a, 0, a, b);
13621        assert_eq_m512i(r, a);
13622        let r = _mm512_mask_subs_epu8(
13623            a,
13624            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13625            a,
13626            b,
13627        );
13628        #[rustfmt::skip]
13629        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13630                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13631                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13632                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13633        assert_eq_m512i(r, e);
13634    }
13635
13636    #[simd_test(enable = "avx512bw")]
13637    const fn test_mm512_maskz_subs_epu8() {
13638        let a = _mm512_set1_epi8(1);
13639        let b = _mm512_set1_epi8(u8::MAX as i8);
13640        let r = _mm512_maskz_subs_epu8(0, a, b);
13641        assert_eq_m512i(r, _mm512_setzero_si512());
13642        let r = _mm512_maskz_subs_epu8(
13643            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13644            a,
13645            b,
13646        );
13647        #[rustfmt::skip]
13648        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13649                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13650                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13651                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13652        assert_eq_m512i(r, e);
13653    }
13654
13655    #[simd_test(enable = "avx512bw,avx512vl")]
13656    const fn test_mm256_mask_subs_epu8() {
13657        let a = _mm256_set1_epi8(1);
13658        let b = _mm256_set1_epi8(u8::MAX as i8);
13659        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13660        assert_eq_m256i(r, a);
13661        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13662        #[rustfmt::skip]
13663        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13664                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13665        assert_eq_m256i(r, e);
13666    }
13667
13668    #[simd_test(enable = "avx512bw,avx512vl")]
13669    const fn test_mm256_maskz_subs_epu8() {
13670        let a = _mm256_set1_epi8(1);
13671        let b = _mm256_set1_epi8(u8::MAX as i8);
13672        let r = _mm256_maskz_subs_epu8(0, a, b);
13673        assert_eq_m256i(r, _mm256_setzero_si256());
13674        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13675        #[rustfmt::skip]
13676        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13677                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13678        assert_eq_m256i(r, e);
13679    }
13680
13681    #[simd_test(enable = "avx512bw,avx512vl")]
13682    const fn test_mm_mask_subs_epu8() {
13683        let a = _mm_set1_epi8(1);
13684        let b = _mm_set1_epi8(u8::MAX as i8);
13685        let r = _mm_mask_subs_epu8(a, 0, a, b);
13686        assert_eq_m128i(r, a);
13687        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13688        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13689        assert_eq_m128i(r, e);
13690    }
13691
13692    #[simd_test(enable = "avx512bw,avx512vl")]
13693    const fn test_mm_maskz_subs_epu8() {
13694        let a = _mm_set1_epi8(1);
13695        let b = _mm_set1_epi8(u8::MAX as i8);
13696        let r = _mm_maskz_subs_epu8(0, a, b);
13697        assert_eq_m128i(r, _mm_setzero_si128());
13698        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13699        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13700        assert_eq_m128i(r, e);
13701    }
13702
13703    #[simd_test(enable = "avx512bw")]
13704    const fn test_mm512_subs_epi16() {
13705        let a = _mm512_set1_epi16(-1);
13706        let b = _mm512_set1_epi16(i16::MAX);
13707        let r = _mm512_subs_epi16(a, b);
13708        let e = _mm512_set1_epi16(i16::MIN);
13709        assert_eq_m512i(r, e);
13710    }
13711
13712    #[simd_test(enable = "avx512bw")]
13713    const fn test_mm512_mask_subs_epi16() {
13714        let a = _mm512_set1_epi16(-1);
13715        let b = _mm512_set1_epi16(i16::MAX);
13716        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13717        assert_eq_m512i(r, a);
13718        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13719        #[rustfmt::skip]
13720        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13721                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13722        assert_eq_m512i(r, e);
13723    }
13724
13725    #[simd_test(enable = "avx512bw")]
13726    const fn test_mm512_maskz_subs_epi16() {
13727        let a = _mm512_set1_epi16(-1);
13728        let b = _mm512_set1_epi16(i16::MAX);
13729        let r = _mm512_maskz_subs_epi16(0, a, b);
13730        assert_eq_m512i(r, _mm512_setzero_si512());
13731        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13732        #[rustfmt::skip]
13733        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13734                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13735        assert_eq_m512i(r, e);
13736    }
13737
13738    #[simd_test(enable = "avx512bw,avx512vl")]
13739    const fn test_mm256_mask_subs_epi16() {
13740        let a = _mm256_set1_epi16(-1);
13741        let b = _mm256_set1_epi16(i16::MAX);
13742        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13743        assert_eq_m256i(r, a);
13744        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13745        #[rustfmt::skip]
13746        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13747        assert_eq_m256i(r, e);
13748    }
13749
13750    #[simd_test(enable = "avx512bw,avx512vl")]
13751    const fn test_mm256_maskz_subs_epi16() {
13752        let a = _mm256_set1_epi16(-1);
13753        let b = _mm256_set1_epi16(i16::MAX);
13754        let r = _mm256_maskz_subs_epi16(0, a, b);
13755        assert_eq_m256i(r, _mm256_setzero_si256());
13756        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13757        #[rustfmt::skip]
13758        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13759        assert_eq_m256i(r, e);
13760    }
13761
13762    #[simd_test(enable = "avx512bw,avx512vl")]
13763    const fn test_mm_mask_subs_epi16() {
13764        let a = _mm_set1_epi16(-1);
13765        let b = _mm_set1_epi16(i16::MAX);
13766        let r = _mm_mask_subs_epi16(a, 0, a, b);
13767        assert_eq_m128i(r, a);
13768        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13769        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13770        assert_eq_m128i(r, e);
13771    }
13772
13773    #[simd_test(enable = "avx512bw,avx512vl")]
13774    const fn test_mm_maskz_subs_epi16() {
13775        let a = _mm_set1_epi16(-1);
13776        let b = _mm_set1_epi16(i16::MAX);
13777        let r = _mm_maskz_subs_epi16(0, a, b);
13778        assert_eq_m128i(r, _mm_setzero_si128());
13779        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13780        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13781        assert_eq_m128i(r, e);
13782    }
13783
13784    #[simd_test(enable = "avx512bw")]
13785    const fn test_mm512_subs_epi8() {
13786        let a = _mm512_set1_epi8(-1);
13787        let b = _mm512_set1_epi8(i8::MAX);
13788        let r = _mm512_subs_epi8(a, b);
13789        let e = _mm512_set1_epi8(i8::MIN);
13790        assert_eq_m512i(r, e);
13791    }
13792
13793    #[simd_test(enable = "avx512bw")]
13794    const fn test_mm512_mask_subs_epi8() {
13795        let a = _mm512_set1_epi8(-1);
13796        let b = _mm512_set1_epi8(i8::MAX);
13797        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13798        assert_eq_m512i(r, a);
13799        let r = _mm512_mask_subs_epi8(
13800            a,
13801            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13802            a,
13803            b,
13804        );
13805        #[rustfmt::skip]
13806        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13807                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13808                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13809                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13810        assert_eq_m512i(r, e);
13811    }
13812
13813    #[simd_test(enable = "avx512bw")]
13814    const fn test_mm512_maskz_subs_epi8() {
13815        let a = _mm512_set1_epi8(-1);
13816        let b = _mm512_set1_epi8(i8::MAX);
13817        let r = _mm512_maskz_subs_epi8(0, a, b);
13818        assert_eq_m512i(r, _mm512_setzero_si512());
13819        let r = _mm512_maskz_subs_epi8(
13820            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13821            a,
13822            b,
13823        );
13824        #[rustfmt::skip]
13825        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13826                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13827                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13828                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13829        assert_eq_m512i(r, e);
13830    }
13831
13832    #[simd_test(enable = "avx512bw,avx512vl")]
13833    const fn test_mm256_mask_subs_epi8() {
13834        let a = _mm256_set1_epi8(-1);
13835        let b = _mm256_set1_epi8(i8::MAX);
13836        let r = _mm256_mask_subs_epi8(a, 0, a, b);
13837        assert_eq_m256i(r, a);
13838        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13839        #[rustfmt::skip]
13840        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13841                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13842        assert_eq_m256i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512bw,avx512vl")]
13846    const fn test_mm256_maskz_subs_epi8() {
13847        let a = _mm256_set1_epi8(-1);
13848        let b = _mm256_set1_epi8(i8::MAX);
13849        let r = _mm256_maskz_subs_epi8(0, a, b);
13850        assert_eq_m256i(r, _mm256_setzero_si256());
13851        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
13852        #[rustfmt::skip]
13853        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13854                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13855        assert_eq_m256i(r, e);
13856    }
13857
13858    #[simd_test(enable = "avx512bw,avx512vl")]
13859    const fn test_mm_mask_subs_epi8() {
13860        let a = _mm_set1_epi8(-1);
13861        let b = _mm_set1_epi8(i8::MAX);
13862        let r = _mm_mask_subs_epi8(a, 0, a, b);
13863        assert_eq_m128i(r, a);
13864        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
13865        #[rustfmt::skip]
13866        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13867        assert_eq_m128i(r, e);
13868    }
13869
13870    #[simd_test(enable = "avx512bw,avx512vl")]
13871    const fn test_mm_maskz_subs_epi8() {
13872        let a = _mm_set1_epi8(-1);
13873        let b = _mm_set1_epi8(i8::MAX);
13874        let r = _mm_maskz_subs_epi8(0, a, b);
13875        assert_eq_m128i(r, _mm_setzero_si128());
13876        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13877        #[rustfmt::skip]
13878        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13879        assert_eq_m128i(r, e);
13880    }
13881
13882    #[simd_test(enable = "avx512bw")]
13883    const fn test_mm512_mulhi_epu16() {
13884        let a = _mm512_set1_epi16(1);
13885        let b = _mm512_set1_epi16(1);
13886        let r = _mm512_mulhi_epu16(a, b);
13887        let e = _mm512_set1_epi16(0);
13888        assert_eq_m512i(r, e);
13889    }
13890
13891    #[simd_test(enable = "avx512bw")]
13892    const fn test_mm512_mask_mulhi_epu16() {
13893        let a = _mm512_set1_epi16(1);
13894        let b = _mm512_set1_epi16(1);
13895        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13896        assert_eq_m512i(r, a);
13897        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13898        #[rustfmt::skip]
13899        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13900                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13901        assert_eq_m512i(r, e);
13902    }
13903
13904    #[simd_test(enable = "avx512bw")]
13905    const fn test_mm512_maskz_mulhi_epu16() {
13906        let a = _mm512_set1_epi16(1);
13907        let b = _mm512_set1_epi16(1);
13908        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13909        assert_eq_m512i(r, _mm512_setzero_si512());
13910        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13911        #[rustfmt::skip]
13912        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13913                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13914        assert_eq_m512i(r, e);
13915    }
13916
13917    #[simd_test(enable = "avx512bw,avx512vl")]
13918    const fn test_mm256_mask_mulhi_epu16() {
13919        let a = _mm256_set1_epi16(1);
13920        let b = _mm256_set1_epi16(1);
13921        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13922        assert_eq_m256i(r, a);
13923        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13924        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13925        assert_eq_m256i(r, e);
13926    }
13927
13928    #[simd_test(enable = "avx512bw,avx512vl")]
13929    const fn test_mm256_maskz_mulhi_epu16() {
13930        let a = _mm256_set1_epi16(1);
13931        let b = _mm256_set1_epi16(1);
13932        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13933        assert_eq_m256i(r, _mm256_setzero_si256());
13934        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13935        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13936        assert_eq_m256i(r, e);
13937    }
13938
13939    #[simd_test(enable = "avx512bw,avx512vl")]
13940    const fn test_mm_mask_mulhi_epu16() {
13941        let a = _mm_set1_epi16(1);
13942        let b = _mm_set1_epi16(1);
13943        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13944        assert_eq_m128i(r, a);
13945        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13946        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13947        assert_eq_m128i(r, e);
13948    }
13949
13950    #[simd_test(enable = "avx512bw,avx512vl")]
13951    const fn test_mm_maskz_mulhi_epu16() {
13952        let a = _mm_set1_epi16(1);
13953        let b = _mm_set1_epi16(1);
13954        let r = _mm_maskz_mulhi_epu16(0, a, b);
13955        assert_eq_m128i(r, _mm_setzero_si128());
13956        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13957        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13958        assert_eq_m128i(r, e);
13959    }
13960
13961    #[simd_test(enable = "avx512bw")]
13962    const fn test_mm512_mulhi_epi16() {
13963        let a = _mm512_set1_epi16(1);
13964        let b = _mm512_set1_epi16(1);
13965        let r = _mm512_mulhi_epi16(a, b);
13966        let e = _mm512_set1_epi16(0);
13967        assert_eq_m512i(r, e);
13968    }
13969
13970    #[simd_test(enable = "avx512bw")]
13971    const fn test_mm512_mask_mulhi_epi16() {
13972        let a = _mm512_set1_epi16(1);
13973        let b = _mm512_set1_epi16(1);
13974        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13975        assert_eq_m512i(r, a);
13976        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13977        #[rustfmt::skip]
13978        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13979                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13980        assert_eq_m512i(r, e);
13981    }
13982
13983    #[simd_test(enable = "avx512bw")]
13984    const fn test_mm512_maskz_mulhi_epi16() {
13985        let a = _mm512_set1_epi16(1);
13986        let b = _mm512_set1_epi16(1);
13987        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13988        assert_eq_m512i(r, _mm512_setzero_si512());
13989        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13990        #[rustfmt::skip]
13991        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13992                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13993        assert_eq_m512i(r, e);
13994    }
13995
13996    #[simd_test(enable = "avx512bw,avx512vl")]
13997    const fn test_mm256_mask_mulhi_epi16() {
13998        let a = _mm256_set1_epi16(1);
13999        let b = _mm256_set1_epi16(1);
14000        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
14001        assert_eq_m256i(r, a);
14002        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
14003        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14004        assert_eq_m256i(r, e);
14005    }
14006
14007    #[simd_test(enable = "avx512bw,avx512vl")]
14008    const fn test_mm256_maskz_mulhi_epi16() {
14009        let a = _mm256_set1_epi16(1);
14010        let b = _mm256_set1_epi16(1);
14011        let r = _mm256_maskz_mulhi_epi16(0, a, b);
14012        assert_eq_m256i(r, _mm256_setzero_si256());
14013        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
14014        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14015        assert_eq_m256i(r, e);
14016    }
14017
14018    #[simd_test(enable = "avx512bw,avx512vl")]
14019    const fn test_mm_mask_mulhi_epi16() {
14020        let a = _mm_set1_epi16(1);
14021        let b = _mm_set1_epi16(1);
14022        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
14023        assert_eq_m128i(r, a);
14024        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
14025        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14026        assert_eq_m128i(r, e);
14027    }
14028
14029    #[simd_test(enable = "avx512bw,avx512vl")]
14030    const fn test_mm_maskz_mulhi_epi16() {
14031        let a = _mm_set1_epi16(1);
14032        let b = _mm_set1_epi16(1);
14033        let r = _mm_maskz_mulhi_epi16(0, a, b);
14034        assert_eq_m128i(r, _mm_setzero_si128());
14035        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
14036        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14037        assert_eq_m128i(r, e);
14038    }
14039
14040    #[simd_test(enable = "avx512bw")]
14041    fn test_mm512_mulhrs_epi16() {
14042        let a = _mm512_set1_epi16(1);
14043        let b = _mm512_set1_epi16(1);
14044        let r = _mm512_mulhrs_epi16(a, b);
14045        let e = _mm512_set1_epi16(0);
14046        assert_eq_m512i(r, e);
14047    }
14048
14049    #[simd_test(enable = "avx512bw")]
14050    fn test_mm512_mask_mulhrs_epi16() {
14051        let a = _mm512_set1_epi16(1);
14052        let b = _mm512_set1_epi16(1);
14053        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
14054        assert_eq_m512i(r, a);
14055        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14056        #[rustfmt::skip]
14057        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14058                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14059        assert_eq_m512i(r, e);
14060    }
14061
14062    #[simd_test(enable = "avx512bw")]
14063    fn test_mm512_maskz_mulhrs_epi16() {
14064        let a = _mm512_set1_epi16(1);
14065        let b = _mm512_set1_epi16(1);
14066        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
14067        assert_eq_m512i(r, _mm512_setzero_si512());
14068        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14069        #[rustfmt::skip]
14070        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14071                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14072        assert_eq_m512i(r, e);
14073    }
14074
14075    #[simd_test(enable = "avx512bw,avx512vl")]
14076    fn test_mm256_mask_mulhrs_epi16() {
14077        let a = _mm256_set1_epi16(1);
14078        let b = _mm256_set1_epi16(1);
14079        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
14080        assert_eq_m256i(r, a);
14081        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
14082        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14083        assert_eq_m256i(r, e);
14084    }
14085
14086    #[simd_test(enable = "avx512bw,avx512vl")]
14087    fn test_mm256_maskz_mulhrs_epi16() {
14088        let a = _mm256_set1_epi16(1);
14089        let b = _mm256_set1_epi16(1);
14090        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
14091        assert_eq_m256i(r, _mm256_setzero_si256());
14092        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
14093        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14094        assert_eq_m256i(r, e);
14095    }
14096
14097    #[simd_test(enable = "avx512bw,avx512vl")]
14098    fn test_mm_mask_mulhrs_epi16() {
14099        let a = _mm_set1_epi16(1);
14100        let b = _mm_set1_epi16(1);
14101        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
14102        assert_eq_m128i(r, a);
14103        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
14104        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14105        assert_eq_m128i(r, e);
14106    }
14107
14108    #[simd_test(enable = "avx512bw,avx512vl")]
14109    fn test_mm_maskz_mulhrs_epi16() {
14110        let a = _mm_set1_epi16(1);
14111        let b = _mm_set1_epi16(1);
14112        let r = _mm_maskz_mulhrs_epi16(0, a, b);
14113        assert_eq_m128i(r, _mm_setzero_si128());
14114        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
14115        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14116        assert_eq_m128i(r, e);
14117    }
14118
14119    #[simd_test(enable = "avx512bw")]
14120    const fn test_mm512_mullo_epi16() {
14121        let a = _mm512_set1_epi16(1);
14122        let b = _mm512_set1_epi16(1);
14123        let r = _mm512_mullo_epi16(a, b);
14124        let e = _mm512_set1_epi16(1);
14125        assert_eq_m512i(r, e);
14126    }
14127
14128    #[simd_test(enable = "avx512bw")]
14129    const fn test_mm512_mask_mullo_epi16() {
14130        let a = _mm512_set1_epi16(1);
14131        let b = _mm512_set1_epi16(1);
14132        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
14133        assert_eq_m512i(r, a);
14134        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14135        #[rustfmt::skip]
14136        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14137                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14138        assert_eq_m512i(r, e);
14139    }
14140
14141    #[simd_test(enable = "avx512bw")]
14142    const fn test_mm512_maskz_mullo_epi16() {
14143        let a = _mm512_set1_epi16(1);
14144        let b = _mm512_set1_epi16(1);
14145        let r = _mm512_maskz_mullo_epi16(0, a, b);
14146        assert_eq_m512i(r, _mm512_setzero_si512());
14147        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
14148        #[rustfmt::skip]
14149        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14150                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14151        assert_eq_m512i(r, e);
14152    }
14153
14154    #[simd_test(enable = "avx512bw,avx512vl")]
14155    const fn test_mm256_mask_mullo_epi16() {
14156        let a = _mm256_set1_epi16(1);
14157        let b = _mm256_set1_epi16(1);
14158        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
14159        assert_eq_m256i(r, a);
14160        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
14161        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14162        assert_eq_m256i(r, e);
14163    }
14164
14165    #[simd_test(enable = "avx512bw,avx512vl")]
14166    const fn test_mm256_maskz_mullo_epi16() {
14167        let a = _mm256_set1_epi16(1);
14168        let b = _mm256_set1_epi16(1);
14169        let r = _mm256_maskz_mullo_epi16(0, a, b);
14170        assert_eq_m256i(r, _mm256_setzero_si256());
14171        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
14172        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14173        assert_eq_m256i(r, e);
14174    }
14175
14176    #[simd_test(enable = "avx512bw,avx512vl")]
14177    const fn test_mm_mask_mullo_epi16() {
14178        let a = _mm_set1_epi16(1);
14179        let b = _mm_set1_epi16(1);
14180        let r = _mm_mask_mullo_epi16(a, 0, a, b);
14181        assert_eq_m128i(r, a);
14182        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
14183        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14184        assert_eq_m128i(r, e);
14185    }
14186
14187    #[simd_test(enable = "avx512bw,avx512vl")]
14188    const fn test_mm_maskz_mullo_epi16() {
14189        let a = _mm_set1_epi16(1);
14190        let b = _mm_set1_epi16(1);
14191        let r = _mm_maskz_mullo_epi16(0, a, b);
14192        assert_eq_m128i(r, _mm_setzero_si128());
14193        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
14194        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14195        assert_eq_m128i(r, e);
14196    }
14197
14198    #[simd_test(enable = "avx512bw")]
14199    const fn test_mm512_max_epu16() {
14200        #[rustfmt::skip]
14201        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14202                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14203        #[rustfmt::skip]
14204        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14205                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14206        let r = _mm512_max_epu16(a, b);
14207        #[rustfmt::skip]
14208        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14209                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14210        assert_eq_m512i(r, e);
14211    }
14212
14213    #[simd_test(enable = "avx512bw")]
14214    const fn test_mm512_mask_max_epu16() {
14215        #[rustfmt::skip]
14216        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14217                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14218        #[rustfmt::skip]
14219        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14220                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14221        let r = _mm512_mask_max_epu16(a, 0, a, b);
14222        assert_eq_m512i(r, a);
14223        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14224        #[rustfmt::skip]
14225        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14226                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14227        assert_eq_m512i(r, e);
14228    }
14229
14230    #[simd_test(enable = "avx512bw")]
14231    const fn test_mm512_maskz_max_epu16() {
14232        #[rustfmt::skip]
14233        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14234                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14235        #[rustfmt::skip]
14236        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14237                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14238        let r = _mm512_maskz_max_epu16(0, a, b);
14239        assert_eq_m512i(r, _mm512_setzero_si512());
14240        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
14241        #[rustfmt::skip]
14242        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14243                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14244        assert_eq_m512i(r, e);
14245    }
14246
14247    #[simd_test(enable = "avx512bw,avx512vl")]
14248    const fn test_mm256_mask_max_epu16() {
14249        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14250        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14251        let r = _mm256_mask_max_epu16(a, 0, a, b);
14252        assert_eq_m256i(r, a);
14253        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
14254        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14255        assert_eq_m256i(r, e);
14256    }
14257
14258    #[simd_test(enable = "avx512bw,avx512vl")]
14259    const fn test_mm256_maskz_max_epu16() {
14260        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14261        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14262        let r = _mm256_maskz_max_epu16(0, a, b);
14263        assert_eq_m256i(r, _mm256_setzero_si256());
14264        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
14265        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14266        assert_eq_m256i(r, e);
14267    }
14268
14269    #[simd_test(enable = "avx512bw,avx512vl")]
14270    const fn test_mm_mask_max_epu16() {
14271        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14272        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14273        let r = _mm_mask_max_epu16(a, 0, a, b);
14274        assert_eq_m128i(r, a);
14275        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
14276        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14277        assert_eq_m128i(r, e);
14278    }
14279
14280    #[simd_test(enable = "avx512bw,avx512vl")]
14281    const fn test_mm_maskz_max_epu16() {
14282        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14283        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14284        let r = _mm_maskz_max_epu16(0, a, b);
14285        assert_eq_m128i(r, _mm_setzero_si128());
14286        let r = _mm_maskz_max_epu16(0b00001111, a, b);
14287        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14288        assert_eq_m128i(r, e);
14289    }
14290
14291    #[simd_test(enable = "avx512bw")]
14292    const fn test_mm512_max_epu8() {
14293        #[rustfmt::skip]
14294        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14295                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14296                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14297                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14298        #[rustfmt::skip]
14299        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14300                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14301                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14302                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14303        let r = _mm512_max_epu8(a, b);
14304        #[rustfmt::skip]
14305        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14306                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14307                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14308                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14309        assert_eq_m512i(r, e);
14310    }
14311
14312    #[simd_test(enable = "avx512bw")]
14313    const fn test_mm512_mask_max_epu8() {
14314        #[rustfmt::skip]
14315        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14316                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14317                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14318                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14319        #[rustfmt::skip]
14320        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14321                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14322                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14323                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14324        let r = _mm512_mask_max_epu8(a, 0, a, b);
14325        assert_eq_m512i(r, a);
14326        let r = _mm512_mask_max_epu8(
14327            a,
14328            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14329            a,
14330            b,
14331        );
14332        #[rustfmt::skip]
14333        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14334                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14335                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14336                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14337        assert_eq_m512i(r, e);
14338    }
14339
14340    #[simd_test(enable = "avx512bw")]
14341    const fn test_mm512_maskz_max_epu8() {
14342        #[rustfmt::skip]
14343        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14344                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14345                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14346                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14347        #[rustfmt::skip]
14348        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14349                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14350                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14351                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14352        let r = _mm512_maskz_max_epu8(0, a, b);
14353        assert_eq_m512i(r, _mm512_setzero_si512());
14354        let r = _mm512_maskz_max_epu8(
14355            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14356            a,
14357            b,
14358        );
14359        #[rustfmt::skip]
14360        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14361                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14362                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14363                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14364        assert_eq_m512i(r, e);
14365    }
14366
14367    #[simd_test(enable = "avx512bw,avx512vl")]
14368    const fn test_mm256_mask_max_epu8() {
14369        #[rustfmt::skip]
14370        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14371                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14372        #[rustfmt::skip]
14373        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14374                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14375        let r = _mm256_mask_max_epu8(a, 0, a, b);
14376        assert_eq_m256i(r, a);
14377        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14378        #[rustfmt::skip]
14379        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14380                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14381        assert_eq_m256i(r, e);
14382    }
14383
14384    #[simd_test(enable = "avx512bw,avx512vl")]
14385    const fn test_mm256_maskz_max_epu8() {
14386        #[rustfmt::skip]
14387        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14388                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14389        #[rustfmt::skip]
14390        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14391                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14392        let r = _mm256_maskz_max_epu8(0, a, b);
14393        assert_eq_m256i(r, _mm256_setzero_si256());
14394        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
14395        #[rustfmt::skip]
14396        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14397                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14398        assert_eq_m256i(r, e);
14399    }
14400
14401    #[simd_test(enable = "avx512bw,avx512vl")]
14402    const fn test_mm_mask_max_epu8() {
14403        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14404        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14405        let r = _mm_mask_max_epu8(a, 0, a, b);
14406        assert_eq_m128i(r, a);
14407        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
14408        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14409        assert_eq_m128i(r, e);
14410    }
14411
14412    #[simd_test(enable = "avx512bw,avx512vl")]
14413    const fn test_mm_maskz_max_epu8() {
14414        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14415        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14416        let r = _mm_maskz_max_epu8(0, a, b);
14417        assert_eq_m128i(r, _mm_setzero_si128());
14418        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
14419        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14420        assert_eq_m128i(r, e);
14421    }
14422
14423    #[simd_test(enable = "avx512bw")]
14424    const fn test_mm512_max_epi16() {
14425        #[rustfmt::skip]
14426        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14427                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14428        #[rustfmt::skip]
14429        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14430                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14431        let r = _mm512_max_epi16(a, b);
14432        #[rustfmt::skip]
14433        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14434                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14435        assert_eq_m512i(r, e);
14436    }
14437
14438    #[simd_test(enable = "avx512bw")]
14439    const fn test_mm512_mask_max_epi16() {
14440        #[rustfmt::skip]
14441        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14442                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14443        #[rustfmt::skip]
14444        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14445                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14446        let r = _mm512_mask_max_epi16(a, 0, a, b);
14447        assert_eq_m512i(r, a);
14448        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14449        #[rustfmt::skip]
14450        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14451                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14452        assert_eq_m512i(r, e);
14453    }
14454
14455    #[simd_test(enable = "avx512bw")]
14456    const fn test_mm512_maskz_max_epi16() {
14457        #[rustfmt::skip]
14458        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14459                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14460        #[rustfmt::skip]
14461        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14462                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14463        let r = _mm512_maskz_max_epi16(0, a, b);
14464        assert_eq_m512i(r, _mm512_setzero_si512());
14465        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
14466        #[rustfmt::skip]
14467        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14468                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14469        assert_eq_m512i(r, e);
14470    }
14471
14472    #[simd_test(enable = "avx512bw,avx512vl")]
14473    const fn test_mm256_mask_max_epi16() {
14474        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14475        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14476        let r = _mm256_mask_max_epi16(a, 0, a, b);
14477        assert_eq_m256i(r, a);
14478        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
14479        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14480        assert_eq_m256i(r, e);
14481    }
14482
14483    #[simd_test(enable = "avx512bw,avx512vl")]
14484    const fn test_mm256_maskz_max_epi16() {
14485        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14486        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14487        let r = _mm256_maskz_max_epi16(0, a, b);
14488        assert_eq_m256i(r, _mm256_setzero_si256());
14489        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
14490        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14491        assert_eq_m256i(r, e);
14492    }
14493
14494    #[simd_test(enable = "avx512bw,avx512vl")]
14495    const fn test_mm_mask_max_epi16() {
14496        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14497        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14498        let r = _mm_mask_max_epi16(a, 0, a, b);
14499        assert_eq_m128i(r, a);
14500        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
14501        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14502        assert_eq_m128i(r, e);
14503    }
14504
14505    #[simd_test(enable = "avx512bw,avx512vl")]
14506    const fn test_mm_maskz_max_epi16() {
14507        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14508        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14509        let r = _mm_maskz_max_epi16(0, a, b);
14510        assert_eq_m128i(r, _mm_setzero_si128());
14511        let r = _mm_maskz_max_epi16(0b00001111, a, b);
14512        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14513        assert_eq_m128i(r, e);
14514    }
14515
14516    #[simd_test(enable = "avx512bw")]
14517    const fn test_mm512_max_epi8() {
14518        #[rustfmt::skip]
14519        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14520                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14521                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14522                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14523        #[rustfmt::skip]
14524        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14525                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14526                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14527                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14528        let r = _mm512_max_epi8(a, b);
14529        #[rustfmt::skip]
14530        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14531                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14532                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14533                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14534        assert_eq_m512i(r, e);
14535    }
14536
14537    #[simd_test(enable = "avx512bw")]
14538    const fn test_mm512_mask_max_epi8() {
14539        #[rustfmt::skip]
14540        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14541                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14542                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14543                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14544        #[rustfmt::skip]
14545        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14546                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14547                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14548                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14549        let r = _mm512_mask_max_epi8(a, 0, a, b);
14550        assert_eq_m512i(r, a);
14551        let r = _mm512_mask_max_epi8(
14552            a,
14553            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14554            a,
14555            b,
14556        );
14557        #[rustfmt::skip]
14558        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14559                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14560                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14561                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14562        assert_eq_m512i(r, e);
14563    }
14564
14565    #[simd_test(enable = "avx512bw")]
14566    const fn test_mm512_maskz_max_epi8() {
14567        #[rustfmt::skip]
14568        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14569                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14570                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14571                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14572        #[rustfmt::skip]
14573        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14574                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14575                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14576                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14577        let r = _mm512_maskz_max_epi8(0, a, b);
14578        assert_eq_m512i(r, _mm512_setzero_si512());
14579        let r = _mm512_maskz_max_epi8(
14580            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14581            a,
14582            b,
14583        );
14584        #[rustfmt::skip]
14585        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14586                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14587                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14588                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14589        assert_eq_m512i(r, e);
14590    }
14591
14592    #[simd_test(enable = "avx512bw,avx512vl")]
14593    const fn test_mm256_mask_max_epi8() {
14594        #[rustfmt::skip]
14595        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14596                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14597        #[rustfmt::skip]
14598        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14599                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14600        let r = _mm256_mask_max_epi8(a, 0, a, b);
14601        assert_eq_m256i(r, a);
14602        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14603        #[rustfmt::skip]
14604        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14605                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14606        assert_eq_m256i(r, e);
14607    }
14608
14609    #[simd_test(enable = "avx512bw,avx512vl")]
14610    const fn test_mm256_maskz_max_epi8() {
14611        #[rustfmt::skip]
14612        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14613                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14614        #[rustfmt::skip]
14615        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14616                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14617        let r = _mm256_maskz_max_epi8(0, a, b);
14618        assert_eq_m256i(r, _mm256_setzero_si256());
14619        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
14620        #[rustfmt::skip]
14621        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14622                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14623        assert_eq_m256i(r, e);
14624    }
14625
14626    #[simd_test(enable = "avx512bw,avx512vl")]
14627    const fn test_mm_mask_max_epi8() {
14628        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14629        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14630        let r = _mm_mask_max_epi8(a, 0, a, b);
14631        assert_eq_m128i(r, a);
14632        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
14633        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14634        assert_eq_m128i(r, e);
14635    }
14636
14637    #[simd_test(enable = "avx512bw,avx512vl")]
14638    const fn test_mm_maskz_max_epi8() {
14639        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14640        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14641        let r = _mm_maskz_max_epi8(0, a, b);
14642        assert_eq_m128i(r, _mm_setzero_si128());
14643        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
14644        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14645        assert_eq_m128i(r, e);
14646    }
14647
14648    #[simd_test(enable = "avx512bw")]
14649    const fn test_mm512_min_epu16() {
14650        #[rustfmt::skip]
14651        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14652                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14653        #[rustfmt::skip]
14654        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14655                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14656        let r = _mm512_min_epu16(a, b);
14657        #[rustfmt::skip]
14658        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14659                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14660        assert_eq_m512i(r, e);
14661    }
14662
14663    #[simd_test(enable = "avx512bw")]
14664    const fn test_mm512_mask_min_epu16() {
14665        #[rustfmt::skip]
14666        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14667                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14668        #[rustfmt::skip]
14669        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14670                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14671        let r = _mm512_mask_min_epu16(a, 0, a, b);
14672        assert_eq_m512i(r, a);
14673        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14674        #[rustfmt::skip]
14675        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14676                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14677        assert_eq_m512i(r, e);
14678    }
14679
14680    #[simd_test(enable = "avx512bw")]
14681    const fn test_mm512_maskz_min_epu16() {
14682        #[rustfmt::skip]
14683        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14684                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14685        #[rustfmt::skip]
14686        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14687                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14688        let r = _mm512_maskz_min_epu16(0, a, b);
14689        assert_eq_m512i(r, _mm512_setzero_si512());
14690        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14691        #[rustfmt::skip]
14692        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14693                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14694        assert_eq_m512i(r, e);
14695    }
14696
14697    #[simd_test(enable = "avx512bw,avx512vl")]
14698    const fn test_mm256_mask_min_epu16() {
14699        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14700        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14701        let r = _mm256_mask_min_epu16(a, 0, a, b);
14702        assert_eq_m256i(r, a);
14703        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14704        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14705        assert_eq_m256i(r, e);
14706    }
14707
14708    #[simd_test(enable = "avx512bw,avx512vl")]
14709    const fn test_mm256_maskz_min_epu16() {
14710        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14711        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14712        let r = _mm256_maskz_min_epu16(0, a, b);
14713        assert_eq_m256i(r, _mm256_setzero_si256());
14714        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14715        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14716        assert_eq_m256i(r, e);
14717    }
14718
14719    #[simd_test(enable = "avx512bw,avx512vl")]
14720    const fn test_mm_mask_min_epu16() {
14721        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14722        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14723        let r = _mm_mask_min_epu16(a, 0, a, b);
14724        assert_eq_m128i(r, a);
14725        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14726        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14727        assert_eq_m128i(r, e);
14728    }
14729
14730    #[simd_test(enable = "avx512bw,avx512vl")]
14731    const fn test_mm_maskz_min_epu16() {
14732        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14733        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14734        let r = _mm_maskz_min_epu16(0, a, b);
14735        assert_eq_m128i(r, _mm_setzero_si128());
14736        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14737        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14738        assert_eq_m128i(r, e);
14739    }
14740
14741    #[simd_test(enable = "avx512bw")]
14742    const fn test_mm512_min_epu8() {
14743        #[rustfmt::skip]
14744        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14745                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14746                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14747                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14748        #[rustfmt::skip]
14749        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14750                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14751                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14752                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14753        let r = _mm512_min_epu8(a, b);
14754        #[rustfmt::skip]
14755        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14756                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14757                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14758                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14759        assert_eq_m512i(r, e);
14760    }
14761
14762    #[simd_test(enable = "avx512bw")]
14763    const fn test_mm512_mask_min_epu8() {
14764        #[rustfmt::skip]
14765        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14766                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14767                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14768                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14769        #[rustfmt::skip]
14770        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14771                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14772                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14773                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14774        let r = _mm512_mask_min_epu8(a, 0, a, b);
14775        assert_eq_m512i(r, a);
14776        let r = _mm512_mask_min_epu8(
14777            a,
14778            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14779            a,
14780            b,
14781        );
14782        #[rustfmt::skip]
14783        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14784                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14785                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14786                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14787        assert_eq_m512i(r, e);
14788    }
14789
14790    #[simd_test(enable = "avx512bw")]
14791    const fn test_mm512_maskz_min_epu8() {
14792        #[rustfmt::skip]
14793        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14794                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14795                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14796                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14797        #[rustfmt::skip]
14798        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14799                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14800                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14801                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14802        let r = _mm512_maskz_min_epu8(0, a, b);
14803        assert_eq_m512i(r, _mm512_setzero_si512());
14804        let r = _mm512_maskz_min_epu8(
14805            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14806            a,
14807            b,
14808        );
14809        #[rustfmt::skip]
14810        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14811                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14812                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14813                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14814        assert_eq_m512i(r, e);
14815    }
14816
14817    #[simd_test(enable = "avx512bw,avx512vl")]
14818    const fn test_mm256_mask_min_epu8() {
14819        #[rustfmt::skip]
14820        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14821                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14822        #[rustfmt::skip]
14823        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14824                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14825        let r = _mm256_mask_min_epu8(a, 0, a, b);
14826        assert_eq_m256i(r, a);
14827        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14828        #[rustfmt::skip]
14829        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14830                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14831        assert_eq_m256i(r, e);
14832    }
14833
14834    #[simd_test(enable = "avx512bw,avx512vl")]
14835    const fn test_mm256_maskz_min_epu8() {
14836        #[rustfmt::skip]
14837        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14838                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14839        #[rustfmt::skip]
14840        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14841                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14842        let r = _mm256_maskz_min_epu8(0, a, b);
14843        assert_eq_m256i(r, _mm256_setzero_si256());
14844        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
14845        #[rustfmt::skip]
14846        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14847                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14848        assert_eq_m256i(r, e);
14849    }
14850
14851    #[simd_test(enable = "avx512bw,avx512vl")]
14852    const fn test_mm_mask_min_epu8() {
14853        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14854        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14855        let r = _mm_mask_min_epu8(a, 0, a, b);
14856        assert_eq_m128i(r, a);
14857        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
14858        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14859        assert_eq_m128i(r, e);
14860    }
14861
14862    #[simd_test(enable = "avx512bw,avx512vl")]
14863    const fn test_mm_maskz_min_epu8() {
14864        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14865        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14866        let r = _mm_maskz_min_epu8(0, a, b);
14867        assert_eq_m128i(r, _mm_setzero_si128());
14868        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
14869        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14870        assert_eq_m128i(r, e);
14871    }
14872
14873    #[simd_test(enable = "avx512bw")]
14874    const fn test_mm512_min_epi16() {
14875        #[rustfmt::skip]
14876        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14877                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14878        #[rustfmt::skip]
14879        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14880                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14881        let r = _mm512_min_epi16(a, b);
14882        #[rustfmt::skip]
14883        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14884                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14885        assert_eq_m512i(r, e);
14886    }
14887
14888    #[simd_test(enable = "avx512bw")]
14889    const fn test_mm512_mask_min_epi16() {
14890        #[rustfmt::skip]
14891        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14892                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14893        #[rustfmt::skip]
14894        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14895                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14896        let r = _mm512_mask_min_epi16(a, 0, a, b);
14897        assert_eq_m512i(r, a);
14898        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14899        #[rustfmt::skip]
14900        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14901                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14902        assert_eq_m512i(r, e);
14903    }
14904
14905    #[simd_test(enable = "avx512bw")]
14906    const fn test_mm512_maskz_min_epi16() {
14907        #[rustfmt::skip]
14908        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14909                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14910        #[rustfmt::skip]
14911        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14912                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14913        let r = _mm512_maskz_min_epi16(0, a, b);
14914        assert_eq_m512i(r, _mm512_setzero_si512());
14915        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14916        #[rustfmt::skip]
14917        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14918                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14919        assert_eq_m512i(r, e);
14920    }
14921
14922    #[simd_test(enable = "avx512bw,avx512vl")]
14923    const fn test_mm256_mask_min_epi16() {
14924        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14925        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14926        let r = _mm256_mask_min_epi16(a, 0, a, b);
14927        assert_eq_m256i(r, a);
14928        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14929        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14930        assert_eq_m256i(r, e);
14931    }
14932
14933    #[simd_test(enable = "avx512bw,avx512vl")]
14934    const fn test_mm256_maskz_min_epi16() {
14935        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14936        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14937        let r = _mm256_maskz_min_epi16(0, a, b);
14938        assert_eq_m256i(r, _mm256_setzero_si256());
14939        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14940        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14941        assert_eq_m256i(r, e);
14942    }
14943
14944    #[simd_test(enable = "avx512bw,avx512vl")]
14945    const fn test_mm_mask_min_epi16() {
14946        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14947        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14948        let r = _mm_mask_min_epi16(a, 0, a, b);
14949        assert_eq_m128i(r, a);
14950        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14951        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14952        assert_eq_m128i(r, e);
14953    }
14954
14955    #[simd_test(enable = "avx512bw,avx512vl")]
14956    const fn test_mm_maskz_min_epi16() {
14957        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14958        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14959        let r = _mm_maskz_min_epi16(0, a, b);
14960        assert_eq_m128i(r, _mm_setzero_si128());
14961        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14962        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14963        assert_eq_m128i(r, e);
14964    }
14965
14966    #[simd_test(enable = "avx512bw")]
14967    const fn test_mm512_min_epi8() {
14968        #[rustfmt::skip]
14969        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14970                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14971                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14972                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14973        #[rustfmt::skip]
14974        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14975                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14976                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14977                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14978        let r = _mm512_min_epi8(a, b);
14979        #[rustfmt::skip]
14980        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14981                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14982                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14983                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14984        assert_eq_m512i(r, e);
14985    }
14986
14987    #[simd_test(enable = "avx512bw")]
14988    const fn test_mm512_mask_min_epi8() {
14989        #[rustfmt::skip]
14990        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14991                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14992                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14993                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14994        #[rustfmt::skip]
14995        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14996                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14997                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14998                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14999        let r = _mm512_mask_min_epi8(a, 0, a, b);
15000        assert_eq_m512i(r, a);
15001        let r = _mm512_mask_min_epi8(
15002            a,
15003            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15004            a,
15005            b,
15006        );
15007        #[rustfmt::skip]
15008        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15009                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15010                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15011                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15012        assert_eq_m512i(r, e);
15013    }
15014
15015    #[simd_test(enable = "avx512bw")]
15016    const fn test_mm512_maskz_min_epi8() {
15017        #[rustfmt::skip]
15018        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15019                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15020                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15021                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15022        #[rustfmt::skip]
15023        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15024                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15025                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15026                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15027        let r = _mm512_maskz_min_epi8(0, a, b);
15028        assert_eq_m512i(r, _mm512_setzero_si512());
15029        let r = _mm512_maskz_min_epi8(
15030            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15031            a,
15032            b,
15033        );
15034        #[rustfmt::skip]
15035        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15036                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15037                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15038                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15039        assert_eq_m512i(r, e);
15040    }
15041
15042    #[simd_test(enable = "avx512bw,avx512vl")]
15043    const fn test_mm256_mask_min_epi8() {
15044        #[rustfmt::skip]
15045        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15046                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15047        #[rustfmt::skip]
15048        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15049                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15050        let r = _mm256_mask_min_epi8(a, 0, a, b);
15051        assert_eq_m256i(r, a);
15052        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
15053        #[rustfmt::skip]
15054        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15055                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15056        assert_eq_m256i(r, e);
15057    }
15058
15059    #[simd_test(enable = "avx512bw,avx512vl")]
15060    const fn test_mm256_maskz_min_epi8() {
15061        #[rustfmt::skip]
15062        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15063                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15064        #[rustfmt::skip]
15065        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15066                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15067        let r = _mm256_maskz_min_epi8(0, a, b);
15068        assert_eq_m256i(r, _mm256_setzero_si256());
15069        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
15070        #[rustfmt::skip]
15071        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15072                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15073        assert_eq_m256i(r, e);
15074    }
15075
15076    #[simd_test(enable = "avx512bw,avx512vl")]
15077    const fn test_mm_mask_min_epi8() {
15078        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15079        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15080        let r = _mm_mask_min_epi8(a, 0, a, b);
15081        assert_eq_m128i(r, a);
15082        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
15083        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15084        assert_eq_m128i(r, e);
15085    }
15086
15087    #[simd_test(enable = "avx512bw,avx512vl")]
15088    const fn test_mm_maskz_min_epi8() {
15089        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15090        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15091        let r = _mm_maskz_min_epi8(0, a, b);
15092        assert_eq_m128i(r, _mm_setzero_si128());
15093        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
15094        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15095        assert_eq_m128i(r, e);
15096    }
15097
15098    #[simd_test(enable = "avx512bw")]
15099    const fn test_mm512_cmplt_epu16_mask() {
15100        let a = _mm512_set1_epi16(-2);
15101        let b = _mm512_set1_epi16(-1);
15102        let m = _mm512_cmplt_epu16_mask(a, b);
15103        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15104    }
15105
15106    #[simd_test(enable = "avx512bw")]
15107    const fn test_mm512_mask_cmplt_epu16_mask() {
15108        let a = _mm512_set1_epi16(-2);
15109        let b = _mm512_set1_epi16(-1);
15110        let mask = 0b01010101_01010101_01010101_01010101;
15111        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
15112        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15113    }
15114
15115    #[simd_test(enable = "avx512bw,avx512vl")]
15116    const fn test_mm256_cmplt_epu16_mask() {
15117        let a = _mm256_set1_epi16(-2);
15118        let b = _mm256_set1_epi16(-1);
15119        let m = _mm256_cmplt_epu16_mask(a, b);
15120        assert_eq!(m, 0b11111111_11111111);
15121    }
15122
15123    #[simd_test(enable = "avx512bw,avx512vl")]
15124    const fn test_mm256_mask_cmplt_epu16_mask() {
15125        let a = _mm256_set1_epi16(-2);
15126        let b = _mm256_set1_epi16(-1);
15127        let mask = 0b01010101_01010101;
15128        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
15129        assert_eq!(r, 0b01010101_01010101);
15130    }
15131
15132    #[simd_test(enable = "avx512bw,avx512vl")]
15133    const fn test_mm_cmplt_epu16_mask() {
15134        let a = _mm_set1_epi16(-2);
15135        let b = _mm_set1_epi16(-1);
15136        let m = _mm_cmplt_epu16_mask(a, b);
15137        assert_eq!(m, 0b11111111);
15138    }
15139
15140    #[simd_test(enable = "avx512bw,avx512vl")]
15141    const fn test_mm_mask_cmplt_epu16_mask() {
15142        let a = _mm_set1_epi16(-2);
15143        let b = _mm_set1_epi16(-1);
15144        let mask = 0b01010101;
15145        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
15146        assert_eq!(r, 0b01010101);
15147    }
15148
15149    #[simd_test(enable = "avx512bw")]
15150    const fn test_mm512_cmplt_epu8_mask() {
15151        let a = _mm512_set1_epi8(-2);
15152        let b = _mm512_set1_epi8(-1);
15153        let m = _mm512_cmplt_epu8_mask(a, b);
15154        assert_eq!(
15155            m,
15156            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15157        );
15158    }
15159
15160    #[simd_test(enable = "avx512bw")]
15161    const fn test_mm512_mask_cmplt_epu8_mask() {
15162        let a = _mm512_set1_epi8(-2);
15163        let b = _mm512_set1_epi8(-1);
15164        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15165        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
15166        assert_eq!(
15167            r,
15168            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15169        );
15170    }
15171
15172    #[simd_test(enable = "avx512bw,avx512vl")]
15173    const fn test_mm256_cmplt_epu8_mask() {
15174        let a = _mm256_set1_epi8(-2);
15175        let b = _mm256_set1_epi8(-1);
15176        let m = _mm256_cmplt_epu8_mask(a, b);
15177        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15178    }
15179
15180    #[simd_test(enable = "avx512bw,avx512vl")]
15181    const fn test_mm256_mask_cmplt_epu8_mask() {
15182        let a = _mm256_set1_epi8(-2);
15183        let b = _mm256_set1_epi8(-1);
15184        let mask = 0b01010101_01010101_01010101_01010101;
15185        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
15186        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15187    }
15188
15189    #[simd_test(enable = "avx512bw,avx512vl")]
15190    const fn test_mm_cmplt_epu8_mask() {
15191        let a = _mm_set1_epi8(-2);
15192        let b = _mm_set1_epi8(-1);
15193        let m = _mm_cmplt_epu8_mask(a, b);
15194        assert_eq!(m, 0b11111111_11111111);
15195    }
15196
15197    #[simd_test(enable = "avx512bw,avx512vl")]
15198    const fn test_mm_mask_cmplt_epu8_mask() {
15199        let a = _mm_set1_epi8(-2);
15200        let b = _mm_set1_epi8(-1);
15201        let mask = 0b01010101_01010101;
15202        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
15203        assert_eq!(r, 0b01010101_01010101);
15204    }
15205
15206    #[simd_test(enable = "avx512bw")]
15207    const fn test_mm512_cmplt_epi16_mask() {
15208        let a = _mm512_set1_epi16(-2);
15209        let b = _mm512_set1_epi16(-1);
15210        let m = _mm512_cmplt_epi16_mask(a, b);
15211        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15212    }
15213
15214    #[simd_test(enable = "avx512bw")]
15215    const fn test_mm512_mask_cmplt_epi16_mask() {
15216        let a = _mm512_set1_epi16(-2);
15217        let b = _mm512_set1_epi16(-1);
15218        let mask = 0b01010101_01010101_01010101_01010101;
15219        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
15220        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15221    }
15222
15223    #[simd_test(enable = "avx512bw,avx512vl")]
15224    const fn test_mm256_cmplt_epi16_mask() {
15225        let a = _mm256_set1_epi16(-2);
15226        let b = _mm256_set1_epi16(-1);
15227        let m = _mm256_cmplt_epi16_mask(a, b);
15228        assert_eq!(m, 0b11111111_11111111);
15229    }
15230
15231    #[simd_test(enable = "avx512bw,avx512vl")]
15232    const fn test_mm256_mask_cmplt_epi16_mask() {
15233        let a = _mm256_set1_epi16(-2);
15234        let b = _mm256_set1_epi16(-1);
15235        let mask = 0b01010101_01010101;
15236        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
15237        assert_eq!(r, 0b01010101_01010101);
15238    }
15239
15240    #[simd_test(enable = "avx512bw,avx512vl")]
15241    const fn test_mm_cmplt_epi16_mask() {
15242        let a = _mm_set1_epi16(-2);
15243        let b = _mm_set1_epi16(-1);
15244        let m = _mm_cmplt_epi16_mask(a, b);
15245        assert_eq!(m, 0b11111111);
15246    }
15247
15248    #[simd_test(enable = "avx512bw,avx512vl")]
15249    const fn test_mm_mask_cmplt_epi16_mask() {
15250        let a = _mm_set1_epi16(-2);
15251        let b = _mm_set1_epi16(-1);
15252        let mask = 0b01010101;
15253        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
15254        assert_eq!(r, 0b01010101);
15255    }
15256
15257    #[simd_test(enable = "avx512bw")]
15258    const fn test_mm512_cmplt_epi8_mask() {
15259        let a = _mm512_set1_epi8(-2);
15260        let b = _mm512_set1_epi8(-1);
15261        let m = _mm512_cmplt_epi8_mask(a, b);
15262        assert_eq!(
15263            m,
15264            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15265        );
15266    }
15267
15268    #[simd_test(enable = "avx512bw")]
15269    const fn test_mm512_mask_cmplt_epi8_mask() {
15270        let a = _mm512_set1_epi8(-2);
15271        let b = _mm512_set1_epi8(-1);
15272        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15273        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
15274        assert_eq!(
15275            r,
15276            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15277        );
15278    }
15279
15280    #[simd_test(enable = "avx512bw,avx512vl")]
15281    const fn test_mm256_cmplt_epi8_mask() {
15282        let a = _mm256_set1_epi8(-2);
15283        let b = _mm256_set1_epi8(-1);
15284        let m = _mm256_cmplt_epi8_mask(a, b);
15285        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15286    }
15287
15288    #[simd_test(enable = "avx512bw,avx512vl")]
15289    const fn test_mm256_mask_cmplt_epi8_mask() {
15290        let a = _mm256_set1_epi8(-2);
15291        let b = _mm256_set1_epi8(-1);
15292        let mask = 0b01010101_01010101_01010101_01010101;
15293        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
15294        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15295    }
15296
15297    #[simd_test(enable = "avx512bw,avx512vl")]
15298    const fn test_mm_cmplt_epi8_mask() {
15299        let a = _mm_set1_epi8(-2);
15300        let b = _mm_set1_epi8(-1);
15301        let m = _mm_cmplt_epi8_mask(a, b);
15302        assert_eq!(m, 0b11111111_11111111);
15303    }
15304
15305    #[simd_test(enable = "avx512bw,avx512vl")]
15306    const fn test_mm_mask_cmplt_epi8_mask() {
15307        let a = _mm_set1_epi8(-2);
15308        let b = _mm_set1_epi8(-1);
15309        let mask = 0b01010101_01010101;
15310        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
15311        assert_eq!(r, 0b01010101_01010101);
15312    }
15313
15314    #[simd_test(enable = "avx512bw")]
15315    const fn test_mm512_cmpgt_epu16_mask() {
15316        let a = _mm512_set1_epi16(2);
15317        let b = _mm512_set1_epi16(1);
15318        let m = _mm512_cmpgt_epu16_mask(a, b);
15319        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15320    }
15321
15322    #[simd_test(enable = "avx512bw")]
15323    const fn test_mm512_mask_cmpgt_epu16_mask() {
15324        let a = _mm512_set1_epi16(2);
15325        let b = _mm512_set1_epi16(1);
15326        let mask = 0b01010101_01010101_01010101_01010101;
15327        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
15328        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15329    }
15330
15331    #[simd_test(enable = "avx512bw,avx512vl")]
15332    const fn test_mm256_cmpgt_epu16_mask() {
15333        let a = _mm256_set1_epi16(2);
15334        let b = _mm256_set1_epi16(1);
15335        let m = _mm256_cmpgt_epu16_mask(a, b);
15336        assert_eq!(m, 0b11111111_11111111);
15337    }
15338
15339    #[simd_test(enable = "avx512bw,avx512vl")]
15340    const fn test_mm256_mask_cmpgt_epu16_mask() {
15341        let a = _mm256_set1_epi16(2);
15342        let b = _mm256_set1_epi16(1);
15343        let mask = 0b01010101_01010101;
15344        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
15345        assert_eq!(r, 0b01010101_01010101);
15346    }
15347
15348    #[simd_test(enable = "avx512bw,avx512vl")]
15349    const fn test_mm_cmpgt_epu16_mask() {
15350        let a = _mm_set1_epi16(2);
15351        let b = _mm_set1_epi16(1);
15352        let m = _mm_cmpgt_epu16_mask(a, b);
15353        assert_eq!(m, 0b11111111);
15354    }
15355
15356    #[simd_test(enable = "avx512bw,avx512vl")]
15357    const fn test_mm_mask_cmpgt_epu16_mask() {
15358        let a = _mm_set1_epi16(2);
15359        let b = _mm_set1_epi16(1);
15360        let mask = 0b01010101;
15361        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
15362        assert_eq!(r, 0b01010101);
15363    }
15364
15365    #[simd_test(enable = "avx512bw")]
15366    const fn test_mm512_cmpgt_epu8_mask() {
15367        let a = _mm512_set1_epi8(2);
15368        let b = _mm512_set1_epi8(1);
15369        let m = _mm512_cmpgt_epu8_mask(a, b);
15370        assert_eq!(
15371            m,
15372            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15373        );
15374    }
15375
15376    #[simd_test(enable = "avx512bw")]
15377    const fn test_mm512_mask_cmpgt_epu8_mask() {
15378        let a = _mm512_set1_epi8(2);
15379        let b = _mm512_set1_epi8(1);
15380        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15381        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
15382        assert_eq!(
15383            r,
15384            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15385        );
15386    }
15387
15388    #[simd_test(enable = "avx512bw,avx512vl")]
15389    const fn test_mm256_cmpgt_epu8_mask() {
15390        let a = _mm256_set1_epi8(2);
15391        let b = _mm256_set1_epi8(1);
15392        let m = _mm256_cmpgt_epu8_mask(a, b);
15393        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15394    }
15395
15396    #[simd_test(enable = "avx512bw,avx512vl")]
15397    const fn test_mm256_mask_cmpgt_epu8_mask() {
15398        let a = _mm256_set1_epi8(2);
15399        let b = _mm256_set1_epi8(1);
15400        let mask = 0b01010101_01010101_01010101_01010101;
15401        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
15402        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15403    }
15404
15405    #[simd_test(enable = "avx512bw,avx512vl")]
15406    const fn test_mm_cmpgt_epu8_mask() {
15407        let a = _mm_set1_epi8(2);
15408        let b = _mm_set1_epi8(1);
15409        let m = _mm_cmpgt_epu8_mask(a, b);
15410        assert_eq!(m, 0b11111111_11111111);
15411    }
15412
15413    #[simd_test(enable = "avx512bw,avx512vl")]
15414    const fn test_mm_mask_cmpgt_epu8_mask() {
15415        let a = _mm_set1_epi8(2);
15416        let b = _mm_set1_epi8(1);
15417        let mask = 0b01010101_01010101;
15418        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
15419        assert_eq!(r, 0b01010101_01010101);
15420    }
15421
15422    #[simd_test(enable = "avx512bw")]
15423    const fn test_mm512_cmpgt_epi16_mask() {
15424        let a = _mm512_set1_epi16(2);
15425        let b = _mm512_set1_epi16(-1);
15426        let m = _mm512_cmpgt_epi16_mask(a, b);
15427        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15428    }
15429
15430    #[simd_test(enable = "avx512bw")]
15431    const fn test_mm512_mask_cmpgt_epi16_mask() {
15432        let a = _mm512_set1_epi16(2);
15433        let b = _mm512_set1_epi16(-1);
15434        let mask = 0b01010101_01010101_01010101_01010101;
15435        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
15436        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15437    }
15438
15439    #[simd_test(enable = "avx512bw,avx512vl")]
15440    const fn test_mm256_cmpgt_epi16_mask() {
15441        let a = _mm256_set1_epi16(2);
15442        let b = _mm256_set1_epi16(-1);
15443        let m = _mm256_cmpgt_epi16_mask(a, b);
15444        assert_eq!(m, 0b11111111_11111111);
15445    }
15446
15447    #[simd_test(enable = "avx512bw,avx512vl")]
15448    const fn test_mm256_mask_cmpgt_epi16_mask() {
15449        let a = _mm256_set1_epi16(2);
15450        let b = _mm256_set1_epi16(-1);
15451        let mask = 0b001010101_01010101;
15452        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
15453        assert_eq!(r, 0b01010101_01010101);
15454    }
15455
15456    #[simd_test(enable = "avx512bw,avx512vl")]
15457    const fn test_mm_cmpgt_epi16_mask() {
15458        let a = _mm_set1_epi16(2);
15459        let b = _mm_set1_epi16(-1);
15460        let m = _mm_cmpgt_epi16_mask(a, b);
15461        assert_eq!(m, 0b11111111);
15462    }
15463
15464    #[simd_test(enable = "avx512bw,avx512vl")]
15465    const fn test_mm_mask_cmpgt_epi16_mask() {
15466        let a = _mm_set1_epi16(2);
15467        let b = _mm_set1_epi16(-1);
15468        let mask = 0b01010101;
15469        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
15470        assert_eq!(r, 0b01010101);
15471    }
15472
15473    #[simd_test(enable = "avx512bw")]
15474    const fn test_mm512_cmpgt_epi8_mask() {
15475        let a = _mm512_set1_epi8(2);
15476        let b = _mm512_set1_epi8(-1);
15477        let m = _mm512_cmpgt_epi8_mask(a, b);
15478        assert_eq!(
15479            m,
15480            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15481        );
15482    }
15483
15484    #[simd_test(enable = "avx512bw")]
15485    const fn test_mm512_mask_cmpgt_epi8_mask() {
15486        let a = _mm512_set1_epi8(2);
15487        let b = _mm512_set1_epi8(-1);
15488        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15489        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
15490        assert_eq!(
15491            r,
15492            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15493        );
15494    }
15495
15496    #[simd_test(enable = "avx512bw,avx512vl")]
15497    const fn test_mm256_cmpgt_epi8_mask() {
15498        let a = _mm256_set1_epi8(2);
15499        let b = _mm256_set1_epi8(-1);
15500        let m = _mm256_cmpgt_epi8_mask(a, b);
15501        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15502    }
15503
15504    #[simd_test(enable = "avx512bw,avx512vl")]
15505    const fn test_mm256_mask_cmpgt_epi8_mask() {
15506        let a = _mm256_set1_epi8(2);
15507        let b = _mm256_set1_epi8(-1);
15508        let mask = 0b01010101_01010101_01010101_01010101;
15509        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
15510        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15511    }
15512
15513    #[simd_test(enable = "avx512bw,avx512vl")]
15514    const fn test_mm_cmpgt_epi8_mask() {
15515        let a = _mm_set1_epi8(2);
15516        let b = _mm_set1_epi8(-1);
15517        let m = _mm_cmpgt_epi8_mask(a, b);
15518        assert_eq!(m, 0b11111111_11111111);
15519    }
15520
15521    #[simd_test(enable = "avx512bw,avx512vl")]
15522    const fn test_mm_mask_cmpgt_epi8_mask() {
15523        let a = _mm_set1_epi8(2);
15524        let b = _mm_set1_epi8(-1);
15525        let mask = 0b01010101_01010101;
15526        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
15527        assert_eq!(r, 0b01010101_01010101);
15528    }
15529
15530    #[simd_test(enable = "avx512bw")]
15531    const fn test_mm512_cmple_epu16_mask() {
15532        let a = _mm512_set1_epi16(-1);
15533        let b = _mm512_set1_epi16(-1);
15534        let m = _mm512_cmple_epu16_mask(a, b);
15535        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15536    }
15537
15538    #[simd_test(enable = "avx512bw")]
15539    const fn test_mm512_mask_cmple_epu16_mask() {
15540        let a = _mm512_set1_epi16(-1);
15541        let b = _mm512_set1_epi16(-1);
15542        let mask = 0b01010101_01010101_01010101_01010101;
15543        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
15544        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15545    }
15546
15547    #[simd_test(enable = "avx512bw,avx512vl")]
15548    const fn test_mm256_cmple_epu16_mask() {
15549        let a = _mm256_set1_epi16(-1);
15550        let b = _mm256_set1_epi16(-1);
15551        let m = _mm256_cmple_epu16_mask(a, b);
15552        assert_eq!(m, 0b11111111_11111111);
15553    }
15554
15555    #[simd_test(enable = "avx512bw,avx512vl")]
15556    const fn test_mm256_mask_cmple_epu16_mask() {
15557        let a = _mm256_set1_epi16(-1);
15558        let b = _mm256_set1_epi16(-1);
15559        let mask = 0b01010101_01010101;
15560        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
15561        assert_eq!(r, 0b01010101_01010101);
15562    }
15563
15564    #[simd_test(enable = "avx512bw,avx512vl")]
15565    const fn test_mm_cmple_epu16_mask() {
15566        let a = _mm_set1_epi16(-1);
15567        let b = _mm_set1_epi16(-1);
15568        let m = _mm_cmple_epu16_mask(a, b);
15569        assert_eq!(m, 0b11111111);
15570    }
15571
15572    #[simd_test(enable = "avx512bw,avx512vl")]
15573    const fn test_mm_mask_cmple_epu16_mask() {
15574        let a = _mm_set1_epi16(-1);
15575        let b = _mm_set1_epi16(-1);
15576        let mask = 0b01010101;
15577        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
15578        assert_eq!(r, 0b01010101);
15579    }
15580
15581    #[simd_test(enable = "avx512bw")]
15582    const fn test_mm512_cmple_epu8_mask() {
15583        let a = _mm512_set1_epi8(-1);
15584        let b = _mm512_set1_epi8(-1);
15585        let m = _mm512_cmple_epu8_mask(a, b);
15586        assert_eq!(
15587            m,
15588            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15589        );
15590    }
15591
15592    #[simd_test(enable = "avx512bw")]
15593    const fn test_mm512_mask_cmple_epu8_mask() {
15594        let a = _mm512_set1_epi8(-1);
15595        let b = _mm512_set1_epi8(-1);
15596        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15597        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
15598        assert_eq!(
15599            r,
15600            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15601        );
15602    }
15603
15604    #[simd_test(enable = "avx512bw,avx512vl")]
15605    const fn test_mm256_cmple_epu8_mask() {
15606        let a = _mm256_set1_epi8(-1);
15607        let b = _mm256_set1_epi8(-1);
15608        let m = _mm256_cmple_epu8_mask(a, b);
15609        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15610    }
15611
15612    #[simd_test(enable = "avx512bw,avx512vl")]
15613    const fn test_mm256_mask_cmple_epu8_mask() {
15614        let a = _mm256_set1_epi8(-1);
15615        let b = _mm256_set1_epi8(-1);
15616        let mask = 0b01010101_01010101_01010101_01010101;
15617        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
15618        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15619    }
15620
15621    #[simd_test(enable = "avx512bw,avx512vl")]
15622    const fn test_mm_cmple_epu8_mask() {
15623        let a = _mm_set1_epi8(-1);
15624        let b = _mm_set1_epi8(-1);
15625        let m = _mm_cmple_epu8_mask(a, b);
15626        assert_eq!(m, 0b11111111_11111111);
15627    }
15628
15629    #[simd_test(enable = "avx512bw,avx512vl")]
15630    const fn test_mm_mask_cmple_epu8_mask() {
15631        let a = _mm_set1_epi8(-1);
15632        let b = _mm_set1_epi8(-1);
15633        let mask = 0b01010101_01010101;
15634        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
15635        assert_eq!(r, 0b01010101_01010101);
15636    }
15637
15638    #[simd_test(enable = "avx512bw")]
15639    const fn test_mm512_cmple_epi16_mask() {
15640        let a = _mm512_set1_epi16(-1);
15641        let b = _mm512_set1_epi16(-1);
15642        let m = _mm512_cmple_epi16_mask(a, b);
15643        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15644    }
15645
15646    #[simd_test(enable = "avx512bw")]
15647    const fn test_mm512_mask_cmple_epi16_mask() {
15648        let a = _mm512_set1_epi16(-1);
15649        let b = _mm512_set1_epi16(-1);
15650        let mask = 0b01010101_01010101_01010101_01010101;
15651        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
15652        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15653    }
15654
15655    #[simd_test(enable = "avx512bw,avx512vl")]
15656    const fn test_mm256_cmple_epi16_mask() {
15657        let a = _mm256_set1_epi16(-1);
15658        let b = _mm256_set1_epi16(-1);
15659        let m = _mm256_cmple_epi16_mask(a, b);
15660        assert_eq!(m, 0b11111111_11111111);
15661    }
15662
15663    #[simd_test(enable = "avx512bw,avx512vl")]
15664    const fn test_mm256_mask_cmple_epi16_mask() {
15665        let a = _mm256_set1_epi16(-1);
15666        let b = _mm256_set1_epi16(-1);
15667        let mask = 0b01010101_01010101;
15668        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15669        assert_eq!(r, 0b01010101_01010101);
15670    }
15671
15672    #[simd_test(enable = "avx512bw,avx512vl")]
15673    const fn test_mm_cmple_epi16_mask() {
15674        let a = _mm_set1_epi16(-1);
15675        let b = _mm_set1_epi16(-1);
15676        let m = _mm_cmple_epi16_mask(a, b);
15677        assert_eq!(m, 0b11111111);
15678    }
15679
15680    #[simd_test(enable = "avx512bw,avx512vl")]
15681    const fn test_mm_mask_cmple_epi16_mask() {
15682        let a = _mm_set1_epi16(-1);
15683        let b = _mm_set1_epi16(-1);
15684        let mask = 0b01010101;
15685        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15686        assert_eq!(r, 0b01010101);
15687    }
15688
15689    #[simd_test(enable = "avx512bw")]
15690    const fn test_mm512_cmple_epi8_mask() {
15691        let a = _mm512_set1_epi8(-1);
15692        let b = _mm512_set1_epi8(-1);
15693        let m = _mm512_cmple_epi8_mask(a, b);
15694        assert_eq!(
15695            m,
15696            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15697        );
15698    }
15699
15700    #[simd_test(enable = "avx512bw")]
15701    const fn test_mm512_mask_cmple_epi8_mask() {
15702        let a = _mm512_set1_epi8(-1);
15703        let b = _mm512_set1_epi8(-1);
15704        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15705        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15706        assert_eq!(
15707            r,
15708            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15709        );
15710    }
15711
15712    #[simd_test(enable = "avx512bw,avx512vl")]
15713    const fn test_mm256_cmple_epi8_mask() {
15714        let a = _mm256_set1_epi8(-1);
15715        let b = _mm256_set1_epi8(-1);
15716        let m = _mm256_cmple_epi8_mask(a, b);
15717        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15718    }
15719
15720    #[simd_test(enable = "avx512bw,avx512vl")]
15721    const fn test_mm256_mask_cmple_epi8_mask() {
15722        let a = _mm256_set1_epi8(-1);
15723        let b = _mm256_set1_epi8(-1);
15724        let mask = 0b01010101_01010101_01010101_01010101;
15725        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15726        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15727    }
15728
15729    #[simd_test(enable = "avx512bw,avx512vl")]
15730    const fn test_mm_cmple_epi8_mask() {
15731        let a = _mm_set1_epi8(-1);
15732        let b = _mm_set1_epi8(-1);
15733        let m = _mm_cmple_epi8_mask(a, b);
15734        assert_eq!(m, 0b11111111_11111111);
15735    }
15736
15737    #[simd_test(enable = "avx512bw,avx512vl")]
15738    const fn test_mm_mask_cmple_epi8_mask() {
15739        let a = _mm_set1_epi8(-1);
15740        let b = _mm_set1_epi8(-1);
15741        let mask = 0b01010101_01010101;
15742        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15743        assert_eq!(r, 0b01010101_01010101);
15744    }
15745
15746    #[simd_test(enable = "avx512bw")]
15747    const fn test_mm512_cmpge_epu16_mask() {
15748        let a = _mm512_set1_epi16(1);
15749        let b = _mm512_set1_epi16(1);
15750        let m = _mm512_cmpge_epu16_mask(a, b);
15751        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15752    }
15753
15754    #[simd_test(enable = "avx512bw")]
15755    const fn test_mm512_mask_cmpge_epu16_mask() {
15756        let a = _mm512_set1_epi16(1);
15757        let b = _mm512_set1_epi16(1);
15758        let mask = 0b01010101_01010101_01010101_01010101;
15759        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15760        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15761    }
15762
15763    #[simd_test(enable = "avx512bw,avx512vl")]
15764    const fn test_mm256_cmpge_epu16_mask() {
15765        let a = _mm256_set1_epi16(1);
15766        let b = _mm256_set1_epi16(1);
15767        let m = _mm256_cmpge_epu16_mask(a, b);
15768        assert_eq!(m, 0b11111111_11111111);
15769    }
15770
15771    #[simd_test(enable = "avx512bw,avx512vl")]
15772    const fn test_mm256_mask_cmpge_epu16_mask() {
15773        let a = _mm256_set1_epi16(1);
15774        let b = _mm256_set1_epi16(1);
15775        let mask = 0b01010101_01010101;
15776        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15777        assert_eq!(r, 0b01010101_01010101);
15778    }
15779
15780    #[simd_test(enable = "avx512bw,avx512vl")]
15781    const fn test_mm_cmpge_epu16_mask() {
15782        let a = _mm_set1_epi16(1);
15783        let b = _mm_set1_epi16(1);
15784        let m = _mm_cmpge_epu16_mask(a, b);
15785        assert_eq!(m, 0b11111111);
15786    }
15787
15788    #[simd_test(enable = "avx512bw,avx512vl")]
15789    const fn test_mm_mask_cmpge_epu16_mask() {
15790        let a = _mm_set1_epi16(1);
15791        let b = _mm_set1_epi16(1);
15792        let mask = 0b01010101;
15793        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15794        assert_eq!(r, 0b01010101);
15795    }
15796
15797    #[simd_test(enable = "avx512bw")]
15798    const fn test_mm512_cmpge_epu8_mask() {
15799        let a = _mm512_set1_epi8(1);
15800        let b = _mm512_set1_epi8(1);
15801        let m = _mm512_cmpge_epu8_mask(a, b);
15802        assert_eq!(
15803            m,
15804            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15805        );
15806    }
15807
15808    #[simd_test(enable = "avx512bw")]
15809    const fn test_mm512_mask_cmpge_epu8_mask() {
15810        let a = _mm512_set1_epi8(1);
15811        let b = _mm512_set1_epi8(1);
15812        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15813        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15814        assert_eq!(
15815            r,
15816            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15817        );
15818    }
15819
15820    #[simd_test(enable = "avx512bw,avx512vl")]
15821    const fn test_mm256_cmpge_epu8_mask() {
15822        let a = _mm256_set1_epi8(1);
15823        let b = _mm256_set1_epi8(1);
15824        let m = _mm256_cmpge_epu8_mask(a, b);
15825        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15826    }
15827
15828    #[simd_test(enable = "avx512bw,avx512vl")]
15829    const fn test_mm256_mask_cmpge_epu8_mask() {
15830        let a = _mm256_set1_epi8(1);
15831        let b = _mm256_set1_epi8(1);
15832        let mask = 0b01010101_01010101_01010101_01010101;
15833        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
15834        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15835    }
15836
15837    #[simd_test(enable = "avx512bw,avx512vl")]
15838    const fn test_mm_cmpge_epu8_mask() {
15839        let a = _mm_set1_epi8(1);
15840        let b = _mm_set1_epi8(1);
15841        let m = _mm_cmpge_epu8_mask(a, b);
15842        assert_eq!(m, 0b11111111_11111111);
15843    }
15844
15845    #[simd_test(enable = "avx512bw,avx512vl")]
15846    const fn test_mm_mask_cmpge_epu8_mask() {
15847        let a = _mm_set1_epi8(1);
15848        let b = _mm_set1_epi8(1);
15849        let mask = 0b01010101_01010101;
15850        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
15851        assert_eq!(r, 0b01010101_01010101);
15852    }
15853
15854    #[simd_test(enable = "avx512bw")]
15855    const fn test_mm512_cmpge_epi16_mask() {
15856        let a = _mm512_set1_epi16(-1);
15857        let b = _mm512_set1_epi16(-1);
15858        let m = _mm512_cmpge_epi16_mask(a, b);
15859        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15860    }
15861
15862    #[simd_test(enable = "avx512bw")]
15863    const fn test_mm512_mask_cmpge_epi16_mask() {
15864        let a = _mm512_set1_epi16(-1);
15865        let b = _mm512_set1_epi16(-1);
15866        let mask = 0b01010101_01010101_01010101_01010101;
15867        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
15868        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15869    }
15870
15871    #[simd_test(enable = "avx512bw,avx512vl")]
15872    const fn test_mm256_cmpge_epi16_mask() {
15873        let a = _mm256_set1_epi16(-1);
15874        let b = _mm256_set1_epi16(-1);
15875        let m = _mm256_cmpge_epi16_mask(a, b);
15876        assert_eq!(m, 0b11111111_11111111);
15877    }
15878
15879    #[simd_test(enable = "avx512bw,avx512vl")]
15880    const fn test_mm256_mask_cmpge_epi16_mask() {
15881        let a = _mm256_set1_epi16(-1);
15882        let b = _mm256_set1_epi16(-1);
15883        let mask = 0b01010101_01010101;
15884        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15885        assert_eq!(r, 0b01010101_01010101);
15886    }
15887
15888    #[simd_test(enable = "avx512bw,avx512vl")]
15889    const fn test_mm_cmpge_epi16_mask() {
15890        let a = _mm_set1_epi16(-1);
15891        let b = _mm_set1_epi16(-1);
15892        let m = _mm_cmpge_epi16_mask(a, b);
15893        assert_eq!(m, 0b11111111);
15894    }
15895
15896    #[simd_test(enable = "avx512bw,avx512vl")]
15897    const fn test_mm_mask_cmpge_epi16_mask() {
15898        let a = _mm_set1_epi16(-1);
15899        let b = _mm_set1_epi16(-1);
15900        let mask = 0b01010101;
15901        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15902        assert_eq!(r, 0b01010101);
15903    }
15904
15905    #[simd_test(enable = "avx512bw")]
15906    const fn test_mm512_cmpge_epi8_mask() {
15907        let a = _mm512_set1_epi8(-1);
15908        let b = _mm512_set1_epi8(-1);
15909        let m = _mm512_cmpge_epi8_mask(a, b);
15910        assert_eq!(
15911            m,
15912            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15913        );
15914    }
15915
15916    #[simd_test(enable = "avx512bw")]
15917    const fn test_mm512_mask_cmpge_epi8_mask() {
15918        let a = _mm512_set1_epi8(-1);
15919        let b = _mm512_set1_epi8(-1);
15920        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15921        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15922        assert_eq!(
15923            r,
15924            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15925        );
15926    }
15927
15928    #[simd_test(enable = "avx512bw,avx512vl")]
15929    const fn test_mm256_cmpge_epi8_mask() {
15930        let a = _mm256_set1_epi8(-1);
15931        let b = _mm256_set1_epi8(-1);
15932        let m = _mm256_cmpge_epi8_mask(a, b);
15933        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15934    }
15935
15936    #[simd_test(enable = "avx512bw,avx512vl")]
15937    const fn test_mm256_mask_cmpge_epi8_mask() {
15938        let a = _mm256_set1_epi8(-1);
15939        let b = _mm256_set1_epi8(-1);
15940        let mask = 0b01010101_01010101_01010101_01010101;
15941        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15942        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15943    }
15944
15945    #[simd_test(enable = "avx512bw,avx512vl")]
15946    const fn test_mm_cmpge_epi8_mask() {
15947        let a = _mm_set1_epi8(-1);
15948        let b = _mm_set1_epi8(-1);
15949        let m = _mm_cmpge_epi8_mask(a, b);
15950        assert_eq!(m, 0b11111111_11111111);
15951    }
15952
15953    #[simd_test(enable = "avx512bw,avx512vl")]
15954    const fn test_mm_mask_cmpge_epi8_mask() {
15955        let a = _mm_set1_epi8(-1);
15956        let b = _mm_set1_epi8(-1);
15957        let mask = 0b01010101_01010101;
15958        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15959        assert_eq!(r, 0b01010101_01010101);
15960    }
15961
15962    #[simd_test(enable = "avx512bw")]
15963    const fn test_mm512_cmpeq_epu16_mask() {
15964        let a = _mm512_set1_epi16(1);
15965        let b = _mm512_set1_epi16(1);
15966        let m = _mm512_cmpeq_epu16_mask(a, b);
15967        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15968    }
15969
15970    #[simd_test(enable = "avx512bw")]
15971    const fn test_mm512_mask_cmpeq_epu16_mask() {
15972        let a = _mm512_set1_epi16(1);
15973        let b = _mm512_set1_epi16(1);
15974        let mask = 0b01010101_01010101_01010101_01010101;
15975        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15976        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15977    }
15978
15979    #[simd_test(enable = "avx512bw,avx512vl")]
15980    const fn test_mm256_cmpeq_epu16_mask() {
15981        let a = _mm256_set1_epi16(1);
15982        let b = _mm256_set1_epi16(1);
15983        let m = _mm256_cmpeq_epu16_mask(a, b);
15984        assert_eq!(m, 0b11111111_11111111);
15985    }
15986
15987    #[simd_test(enable = "avx512bw,avx512vl")]
15988    const fn test_mm256_mask_cmpeq_epu16_mask() {
15989        let a = _mm256_set1_epi16(1);
15990        let b = _mm256_set1_epi16(1);
15991        let mask = 0b01010101_01010101;
15992        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15993        assert_eq!(r, 0b01010101_01010101);
15994    }
15995
15996    #[simd_test(enable = "avx512bw,avx512vl")]
15997    const fn test_mm_cmpeq_epu16_mask() {
15998        let a = _mm_set1_epi16(1);
15999        let b = _mm_set1_epi16(1);
16000        let m = _mm_cmpeq_epu16_mask(a, b);
16001        assert_eq!(m, 0b11111111);
16002    }
16003
16004    #[simd_test(enable = "avx512bw,avx512vl")]
16005    const fn test_mm_mask_cmpeq_epu16_mask() {
16006        let a = _mm_set1_epi16(1);
16007        let b = _mm_set1_epi16(1);
16008        let mask = 0b01010101;
16009        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
16010        assert_eq!(r, 0b01010101);
16011    }
16012
16013    #[simd_test(enable = "avx512bw")]
16014    const fn test_mm512_cmpeq_epu8_mask() {
16015        let a = _mm512_set1_epi8(1);
16016        let b = _mm512_set1_epi8(1);
16017        let m = _mm512_cmpeq_epu8_mask(a, b);
16018        assert_eq!(
16019            m,
16020            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16021        );
16022    }
16023
16024    #[simd_test(enable = "avx512bw")]
16025    const fn test_mm512_mask_cmpeq_epu8_mask() {
16026        let a = _mm512_set1_epi8(1);
16027        let b = _mm512_set1_epi8(1);
16028        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16029        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
16030        assert_eq!(
16031            r,
16032            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16033        );
16034    }
16035
16036    #[simd_test(enable = "avx512bw,avx512vl")]
16037    const fn test_mm256_cmpeq_epu8_mask() {
16038        let a = _mm256_set1_epi8(1);
16039        let b = _mm256_set1_epi8(1);
16040        let m = _mm256_cmpeq_epu8_mask(a, b);
16041        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16042    }
16043
16044    #[simd_test(enable = "avx512bw,avx512vl")]
16045    const fn test_mm256_mask_cmpeq_epu8_mask() {
16046        let a = _mm256_set1_epi8(1);
16047        let b = _mm256_set1_epi8(1);
16048        let mask = 0b01010101_01010101_01010101_01010101;
16049        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
16050        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16051    }
16052
16053    #[simd_test(enable = "avx512bw,avx512vl")]
16054    const fn test_mm_cmpeq_epu8_mask() {
16055        let a = _mm_set1_epi8(1);
16056        let b = _mm_set1_epi8(1);
16057        let m = _mm_cmpeq_epu8_mask(a, b);
16058        assert_eq!(m, 0b11111111_11111111);
16059    }
16060
16061    #[simd_test(enable = "avx512bw,avx512vl")]
16062    const fn test_mm_mask_cmpeq_epu8_mask() {
16063        let a = _mm_set1_epi8(1);
16064        let b = _mm_set1_epi8(1);
16065        let mask = 0b01010101_01010101;
16066        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
16067        assert_eq!(r, 0b01010101_01010101);
16068    }
16069
16070    #[simd_test(enable = "avx512bw")]
16071    const fn test_mm512_cmpeq_epi16_mask() {
16072        let a = _mm512_set1_epi16(-1);
16073        let b = _mm512_set1_epi16(-1);
16074        let m = _mm512_cmpeq_epi16_mask(a, b);
16075        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16076    }
16077
16078    #[simd_test(enable = "avx512bw")]
16079    const fn test_mm512_mask_cmpeq_epi16_mask() {
16080        let a = _mm512_set1_epi16(-1);
16081        let b = _mm512_set1_epi16(-1);
16082        let mask = 0b01010101_01010101_01010101_01010101;
16083        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
16084        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16085    }
16086
16087    #[simd_test(enable = "avx512bw,avx512vl")]
16088    const fn test_mm256_cmpeq_epi16_mask() {
16089        let a = _mm256_set1_epi16(-1);
16090        let b = _mm256_set1_epi16(-1);
16091        let m = _mm256_cmpeq_epi16_mask(a, b);
16092        assert_eq!(m, 0b11111111_11111111);
16093    }
16094
16095    #[simd_test(enable = "avx512bw,avx512vl")]
16096    const fn test_mm256_mask_cmpeq_epi16_mask() {
16097        let a = _mm256_set1_epi16(-1);
16098        let b = _mm256_set1_epi16(-1);
16099        let mask = 0b01010101_01010101;
16100        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
16101        assert_eq!(r, 0b01010101_01010101);
16102    }
16103
16104    #[simd_test(enable = "avx512bw,avx512vl")]
16105    const fn test_mm_cmpeq_epi16_mask() {
16106        let a = _mm_set1_epi16(-1);
16107        let b = _mm_set1_epi16(-1);
16108        let m = _mm_cmpeq_epi16_mask(a, b);
16109        assert_eq!(m, 0b11111111);
16110    }
16111
16112    #[simd_test(enable = "avx512bw,avx512vl")]
16113    const fn test_mm_mask_cmpeq_epi16_mask() {
16114        let a = _mm_set1_epi16(-1);
16115        let b = _mm_set1_epi16(-1);
16116        let mask = 0b01010101;
16117        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
16118        assert_eq!(r, 0b01010101);
16119    }
16120
16121    #[simd_test(enable = "avx512bw")]
16122    const fn test_mm512_cmpeq_epi8_mask() {
16123        let a = _mm512_set1_epi8(-1);
16124        let b = _mm512_set1_epi8(-1);
16125        let m = _mm512_cmpeq_epi8_mask(a, b);
16126        assert_eq!(
16127            m,
16128            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16129        );
16130    }
16131
16132    #[simd_test(enable = "avx512bw")]
16133    const fn test_mm512_mask_cmpeq_epi8_mask() {
16134        let a = _mm512_set1_epi8(-1);
16135        let b = _mm512_set1_epi8(-1);
16136        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16137        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
16138        assert_eq!(
16139            r,
16140            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16141        );
16142    }
16143
16144    #[simd_test(enable = "avx512bw,avx512vl")]
16145    const fn test_mm256_cmpeq_epi8_mask() {
16146        let a = _mm256_set1_epi8(-1);
16147        let b = _mm256_set1_epi8(-1);
16148        let m = _mm256_cmpeq_epi8_mask(a, b);
16149        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16150    }
16151
16152    #[simd_test(enable = "avx512bw,avx512vl")]
16153    const fn test_mm256_mask_cmpeq_epi8_mask() {
16154        let a = _mm256_set1_epi8(-1);
16155        let b = _mm256_set1_epi8(-1);
16156        let mask = 0b01010101_01010101_01010101_01010101;
16157        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
16158        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16159    }
16160
16161    #[simd_test(enable = "avx512bw,avx512vl")]
16162    const fn test_mm_cmpeq_epi8_mask() {
16163        let a = _mm_set1_epi8(-1);
16164        let b = _mm_set1_epi8(-1);
16165        let m = _mm_cmpeq_epi8_mask(a, b);
16166        assert_eq!(m, 0b11111111_11111111);
16167    }
16168
16169    #[simd_test(enable = "avx512bw,avx512vl")]
16170    const fn test_mm_mask_cmpeq_epi8_mask() {
16171        let a = _mm_set1_epi8(-1);
16172        let b = _mm_set1_epi8(-1);
16173        let mask = 0b01010101_01010101;
16174        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
16175        assert_eq!(r, 0b01010101_01010101);
16176    }
16177
16178    #[simd_test(enable = "avx512bw")]
16179    const fn test_mm512_cmpneq_epu16_mask() {
16180        let a = _mm512_set1_epi16(2);
16181        let b = _mm512_set1_epi16(1);
16182        let m = _mm512_cmpneq_epu16_mask(a, b);
16183        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16184    }
16185
16186    #[simd_test(enable = "avx512bw")]
16187    const fn test_mm512_mask_cmpneq_epu16_mask() {
16188        let a = _mm512_set1_epi16(2);
16189        let b = _mm512_set1_epi16(1);
16190        let mask = 0b01010101_01010101_01010101_01010101;
16191        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
16192        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16193    }
16194
16195    #[simd_test(enable = "avx512bw,avx512vl")]
16196    const fn test_mm256_cmpneq_epu16_mask() {
16197        let a = _mm256_set1_epi16(2);
16198        let b = _mm256_set1_epi16(1);
16199        let m = _mm256_cmpneq_epu16_mask(a, b);
16200        assert_eq!(m, 0b11111111_11111111);
16201    }
16202
16203    #[simd_test(enable = "avx512bw,avx512vl")]
16204    const fn test_mm256_mask_cmpneq_epu16_mask() {
16205        let a = _mm256_set1_epi16(2);
16206        let b = _mm256_set1_epi16(1);
16207        let mask = 0b01010101_01010101;
16208        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
16209        assert_eq!(r, 0b01010101_01010101);
16210    }
16211
16212    #[simd_test(enable = "avx512bw,avx512vl")]
16213    const fn test_mm_cmpneq_epu16_mask() {
16214        let a = _mm_set1_epi16(2);
16215        let b = _mm_set1_epi16(1);
16216        let m = _mm_cmpneq_epu16_mask(a, b);
16217        assert_eq!(m, 0b11111111);
16218    }
16219
16220    #[simd_test(enable = "avx512bw,avx512vl")]
16221    const fn test_mm_mask_cmpneq_epu16_mask() {
16222        let a = _mm_set1_epi16(2);
16223        let b = _mm_set1_epi16(1);
16224        let mask = 0b01010101;
16225        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
16226        assert_eq!(r, 0b01010101);
16227    }
16228
16229    #[simd_test(enable = "avx512bw")]
16230    const fn test_mm512_cmpneq_epu8_mask() {
16231        let a = _mm512_set1_epi8(2);
16232        let b = _mm512_set1_epi8(1);
16233        let m = _mm512_cmpneq_epu8_mask(a, b);
16234        assert_eq!(
16235            m,
16236            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16237        );
16238    }
16239
16240    #[simd_test(enable = "avx512bw")]
16241    const fn test_mm512_mask_cmpneq_epu8_mask() {
16242        let a = _mm512_set1_epi8(2);
16243        let b = _mm512_set1_epi8(1);
16244        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16245        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
16246        assert_eq!(
16247            r,
16248            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16249        );
16250    }
16251
16252    #[simd_test(enable = "avx512bw,avx512vl")]
16253    const fn test_mm256_cmpneq_epu8_mask() {
16254        let a = _mm256_set1_epi8(2);
16255        let b = _mm256_set1_epi8(1);
16256        let m = _mm256_cmpneq_epu8_mask(a, b);
16257        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16258    }
16259
16260    #[simd_test(enable = "avx512bw,avx512vl")]
16261    const fn test_mm256_mask_cmpneq_epu8_mask() {
16262        let a = _mm256_set1_epi8(2);
16263        let b = _mm256_set1_epi8(1);
16264        let mask = 0b01010101_01010101_01010101_01010101;
16265        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
16266        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16267    }
16268
16269    #[simd_test(enable = "avx512bw,avx512vl")]
16270    const fn test_mm_cmpneq_epu8_mask() {
16271        let a = _mm_set1_epi8(2);
16272        let b = _mm_set1_epi8(1);
16273        let m = _mm_cmpneq_epu8_mask(a, b);
16274        assert_eq!(m, 0b11111111_11111111);
16275    }
16276
16277    #[simd_test(enable = "avx512bw,avx512vl")]
16278    const fn test_mm_mask_cmpneq_epu8_mask() {
16279        let a = _mm_set1_epi8(2);
16280        let b = _mm_set1_epi8(1);
16281        let mask = 0b01010101_01010101;
16282        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
16283        assert_eq!(r, 0b01010101_01010101);
16284    }
16285
16286    #[simd_test(enable = "avx512bw")]
16287    const fn test_mm512_cmpneq_epi16_mask() {
16288        let a = _mm512_set1_epi16(1);
16289        let b = _mm512_set1_epi16(-1);
16290        let m = _mm512_cmpneq_epi16_mask(a, b);
16291        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16292    }
16293
16294    #[simd_test(enable = "avx512bw")]
16295    const fn test_mm512_mask_cmpneq_epi16_mask() {
16296        let a = _mm512_set1_epi16(1);
16297        let b = _mm512_set1_epi16(-1);
16298        let mask = 0b01010101_01010101_01010101_01010101;
16299        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
16300        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16301    }
16302
16303    #[simd_test(enable = "avx512bw,avx512vl")]
16304    const fn test_mm256_cmpneq_epi16_mask() {
16305        let a = _mm256_set1_epi16(1);
16306        let b = _mm256_set1_epi16(-1);
16307        let m = _mm256_cmpneq_epi16_mask(a, b);
16308        assert_eq!(m, 0b11111111_11111111);
16309    }
16310
16311    #[simd_test(enable = "avx512bw,avx512vl")]
16312    const fn test_mm256_mask_cmpneq_epi16_mask() {
16313        let a = _mm256_set1_epi16(1);
16314        let b = _mm256_set1_epi16(-1);
16315        let mask = 0b01010101_01010101;
16316        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
16317        assert_eq!(r, 0b01010101_01010101);
16318    }
16319
16320    #[simd_test(enable = "avx512bw,avx512vl")]
16321    const fn test_mm_cmpneq_epi16_mask() {
16322        let a = _mm_set1_epi16(1);
16323        let b = _mm_set1_epi16(-1);
16324        let m = _mm_cmpneq_epi16_mask(a, b);
16325        assert_eq!(m, 0b11111111);
16326    }
16327
16328    #[simd_test(enable = "avx512bw,avx512vl")]
16329    const fn test_mm_mask_cmpneq_epi16_mask() {
16330        let a = _mm_set1_epi16(1);
16331        let b = _mm_set1_epi16(-1);
16332        let mask = 0b01010101;
16333        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
16334        assert_eq!(r, 0b01010101);
16335    }
16336
16337    #[simd_test(enable = "avx512bw")]
16338    const fn test_mm512_cmpneq_epi8_mask() {
16339        let a = _mm512_set1_epi8(1);
16340        let b = _mm512_set1_epi8(-1);
16341        let m = _mm512_cmpneq_epi8_mask(a, b);
16342        assert_eq!(
16343            m,
16344            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16345        );
16346    }
16347
16348    #[simd_test(enable = "avx512bw")]
16349    const fn test_mm512_mask_cmpneq_epi8_mask() {
16350        let a = _mm512_set1_epi8(1);
16351        let b = _mm512_set1_epi8(-1);
16352        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16353        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
16354        assert_eq!(
16355            r,
16356            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16357        );
16358    }
16359
16360    #[simd_test(enable = "avx512bw,avx512vl")]
16361    const fn test_mm256_cmpneq_epi8_mask() {
16362        let a = _mm256_set1_epi8(1);
16363        let b = _mm256_set1_epi8(-1);
16364        let m = _mm256_cmpneq_epi8_mask(a, b);
16365        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16366    }
16367
16368    #[simd_test(enable = "avx512bw,avx512vl")]
16369    const fn test_mm256_mask_cmpneq_epi8_mask() {
16370        let a = _mm256_set1_epi8(1);
16371        let b = _mm256_set1_epi8(-1);
16372        let mask = 0b01010101_01010101_01010101_01010101;
16373        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
16374        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16375    }
16376
16377    #[simd_test(enable = "avx512bw,avx512vl")]
16378    const fn test_mm_cmpneq_epi8_mask() {
16379        let a = _mm_set1_epi8(1);
16380        let b = _mm_set1_epi8(-1);
16381        let m = _mm_cmpneq_epi8_mask(a, b);
16382        assert_eq!(m, 0b11111111_11111111);
16383    }
16384
16385    #[simd_test(enable = "avx512bw,avx512vl")]
16386    const fn test_mm_mask_cmpneq_epi8_mask() {
16387        let a = _mm_set1_epi8(1);
16388        let b = _mm_set1_epi8(-1);
16389        let mask = 0b01010101_01010101;
16390        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
16391        assert_eq!(r, 0b01010101_01010101);
16392    }
16393
16394    #[simd_test(enable = "avx512bw")]
16395    const fn test_mm512_cmp_epu16_mask() {
16396        let a = _mm512_set1_epi16(0);
16397        let b = _mm512_set1_epi16(1);
16398        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16399        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16400    }
16401
16402    #[simd_test(enable = "avx512bw")]
16403    const fn test_mm512_mask_cmp_epu16_mask() {
16404        let a = _mm512_set1_epi16(0);
16405        let b = _mm512_set1_epi16(1);
16406        let mask = 0b01010101_01010101_01010101_01010101;
16407        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16408        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16409    }
16410
16411    #[simd_test(enable = "avx512bw,avx512vl")]
16412    const fn test_mm256_cmp_epu16_mask() {
16413        let a = _mm256_set1_epi16(0);
16414        let b = _mm256_set1_epi16(1);
16415        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16416        assert_eq!(m, 0b11111111_11111111);
16417    }
16418
16419    #[simd_test(enable = "avx512bw,avx512vl")]
16420    const fn test_mm256_mask_cmp_epu16_mask() {
16421        let a = _mm256_set1_epi16(0);
16422        let b = _mm256_set1_epi16(1);
16423        let mask = 0b01010101_01010101;
16424        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16425        assert_eq!(r, 0b01010101_01010101);
16426    }
16427
16428    #[simd_test(enable = "avx512bw,avx512vl")]
16429    const fn test_mm_cmp_epu16_mask() {
16430        let a = _mm_set1_epi16(0);
16431        let b = _mm_set1_epi16(1);
16432        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16433        assert_eq!(m, 0b11111111);
16434    }
16435
16436    #[simd_test(enable = "avx512bw,avx512vl")]
16437    const fn test_mm_mask_cmp_epu16_mask() {
16438        let a = _mm_set1_epi16(0);
16439        let b = _mm_set1_epi16(1);
16440        let mask = 0b01010101;
16441        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16442        assert_eq!(r, 0b01010101);
16443    }
16444
16445    #[simd_test(enable = "avx512bw")]
16446    const fn test_mm512_cmp_epu8_mask() {
16447        let a = _mm512_set1_epi8(0);
16448        let b = _mm512_set1_epi8(1);
16449        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16450        assert_eq!(
16451            m,
16452            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16453        );
16454    }
16455
16456    #[simd_test(enable = "avx512bw")]
16457    const fn test_mm512_mask_cmp_epu8_mask() {
16458        let a = _mm512_set1_epi8(0);
16459        let b = _mm512_set1_epi8(1);
16460        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16461        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16462        assert_eq!(
16463            r,
16464            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16465        );
16466    }
16467
16468    #[simd_test(enable = "avx512bw,avx512vl")]
16469    const fn test_mm256_cmp_epu8_mask() {
16470        let a = _mm256_set1_epi8(0);
16471        let b = _mm256_set1_epi8(1);
16472        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16473        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16474    }
16475
16476    #[simd_test(enable = "avx512bw,avx512vl")]
16477    const fn test_mm256_mask_cmp_epu8_mask() {
16478        let a = _mm256_set1_epi8(0);
16479        let b = _mm256_set1_epi8(1);
16480        let mask = 0b01010101_01010101_01010101_01010101;
16481        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16482        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16483    }
16484
16485    #[simd_test(enable = "avx512bw,avx512vl")]
16486    const fn test_mm_cmp_epu8_mask() {
16487        let a = _mm_set1_epi8(0);
16488        let b = _mm_set1_epi8(1);
16489        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16490        assert_eq!(m, 0b11111111_11111111);
16491    }
16492
16493    #[simd_test(enable = "avx512bw,avx512vl")]
16494    const fn test_mm_mask_cmp_epu8_mask() {
16495        let a = _mm_set1_epi8(0);
16496        let b = _mm_set1_epi8(1);
16497        let mask = 0b01010101_01010101;
16498        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16499        assert_eq!(r, 0b01010101_01010101);
16500    }
16501
16502    #[simd_test(enable = "avx512bw")]
16503    const fn test_mm512_cmp_epi16_mask() {
16504        let a = _mm512_set1_epi16(0);
16505        let b = _mm512_set1_epi16(1);
16506        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16507        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16508    }
16509
16510    #[simd_test(enable = "avx512bw")]
16511    const fn test_mm512_mask_cmp_epi16_mask() {
16512        let a = _mm512_set1_epi16(0);
16513        let b = _mm512_set1_epi16(1);
16514        let mask = 0b01010101_01010101_01010101_01010101;
16515        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16516        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16517    }
16518
16519    #[simd_test(enable = "avx512bw,avx512vl")]
16520    const fn test_mm256_cmp_epi16_mask() {
16521        let a = _mm256_set1_epi16(0);
16522        let b = _mm256_set1_epi16(1);
16523        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16524        assert_eq!(m, 0b11111111_11111111);
16525    }
16526
16527    #[simd_test(enable = "avx512bw,avx512vl")]
16528    const fn test_mm256_mask_cmp_epi16_mask() {
16529        let a = _mm256_set1_epi16(0);
16530        let b = _mm256_set1_epi16(1);
16531        let mask = 0b01010101_01010101;
16532        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16533        assert_eq!(r, 0b01010101_01010101);
16534    }
16535
16536    #[simd_test(enable = "avx512bw,avx512vl")]
16537    const fn test_mm_cmp_epi16_mask() {
16538        let a = _mm_set1_epi16(0);
16539        let b = _mm_set1_epi16(1);
16540        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16541        assert_eq!(m, 0b11111111);
16542    }
16543
16544    #[simd_test(enable = "avx512bw,avx512vl")]
16545    const fn test_mm_mask_cmp_epi16_mask() {
16546        let a = _mm_set1_epi16(0);
16547        let b = _mm_set1_epi16(1);
16548        let mask = 0b01010101;
16549        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16550        assert_eq!(r, 0b01010101);
16551    }
16552
16553    #[simd_test(enable = "avx512bw")]
16554    const fn test_mm512_cmp_epi8_mask() {
16555        let a = _mm512_set1_epi8(0);
16556        let b = _mm512_set1_epi8(1);
16557        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16558        assert_eq!(
16559            m,
16560            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16561        );
16562    }
16563
16564    #[simd_test(enable = "avx512bw")]
16565    const fn test_mm512_mask_cmp_epi8_mask() {
16566        let a = _mm512_set1_epi8(0);
16567        let b = _mm512_set1_epi8(1);
16568        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16569        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16570        assert_eq!(
16571            r,
16572            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16573        );
16574    }
16575
16576    #[simd_test(enable = "avx512bw,avx512vl")]
16577    const fn test_mm256_cmp_epi8_mask() {
16578        let a = _mm256_set1_epi8(0);
16579        let b = _mm256_set1_epi8(1);
16580        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16581        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16582    }
16583
16584    #[simd_test(enable = "avx512bw,avx512vl")]
16585    const fn test_mm256_mask_cmp_epi8_mask() {
16586        let a = _mm256_set1_epi8(0);
16587        let b = _mm256_set1_epi8(1);
16588        let mask = 0b01010101_01010101_01010101_01010101;
16589        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16590        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16591    }
16592
16593    #[simd_test(enable = "avx512bw,avx512vl")]
16594    const fn test_mm_cmp_epi8_mask() {
16595        let a = _mm_set1_epi8(0);
16596        let b = _mm_set1_epi8(1);
16597        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16598        assert_eq!(m, 0b11111111_11111111);
16599    }
16600
16601    #[simd_test(enable = "avx512bw,avx512vl")]
16602    const fn test_mm_mask_cmp_epi8_mask() {
16603        let a = _mm_set1_epi8(0);
16604        let b = _mm_set1_epi8(1);
16605        let mask = 0b01010101_01010101;
16606        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16607        assert_eq!(r, 0b01010101_01010101);
16608    }
16609
16610    #[simd_test(enable = "avx512bw,avx512vl")]
16611    const fn test_mm256_reduce_add_epi16() {
16612        let a = _mm256_set1_epi16(1);
16613        let e = _mm256_reduce_add_epi16(a);
16614        assert_eq!(16, e);
16615    }
16616
16617    #[simd_test(enable = "avx512bw,avx512vl")]
16618    const fn test_mm256_mask_reduce_add_epi16() {
16619        let a = _mm256_set1_epi16(1);
16620        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
16621        assert_eq!(8, e);
16622    }
16623
16624    #[simd_test(enable = "avx512bw,avx512vl")]
16625    const fn test_mm_reduce_add_epi16() {
16626        let a = _mm_set1_epi16(1);
16627        let e = _mm_reduce_add_epi16(a);
16628        assert_eq!(8, e);
16629    }
16630
16631    #[simd_test(enable = "avx512bw,avx512vl")]
16632    const fn test_mm_mask_reduce_add_epi16() {
16633        let a = _mm_set1_epi16(1);
16634        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
16635        assert_eq!(4, e);
16636    }
16637
16638    #[simd_test(enable = "avx512bw,avx512vl")]
16639    const fn test_mm256_reduce_add_epi8() {
16640        let a = _mm256_set1_epi8(1);
16641        let e = _mm256_reduce_add_epi8(a);
16642        assert_eq!(32, e);
16643    }
16644
16645    #[simd_test(enable = "avx512bw,avx512vl")]
16646    const fn test_mm256_mask_reduce_add_epi8() {
16647        let a = _mm256_set1_epi8(1);
16648        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
16649        assert_eq!(16, e);
16650    }
16651
16652    #[simd_test(enable = "avx512bw,avx512vl")]
16653    const fn test_mm_reduce_add_epi8() {
16654        let a = _mm_set1_epi8(1);
16655        let e = _mm_reduce_add_epi8(a);
16656        assert_eq!(16, e);
16657    }
16658
16659    #[simd_test(enable = "avx512bw,avx512vl")]
16660    const fn test_mm_mask_reduce_add_epi8() {
16661        let a = _mm_set1_epi8(1);
16662        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16663        assert_eq!(8, e);
16664    }
16665
16666    #[simd_test(enable = "avx512bw,avx512vl")]
16667    const fn test_mm256_reduce_and_epi16() {
16668        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16669        let e = _mm256_reduce_and_epi16(a);
16670        assert_eq!(0, e);
16671    }
16672
16673    #[simd_test(enable = "avx512bw,avx512vl")]
16674    const fn test_mm256_mask_reduce_and_epi16() {
16675        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16676        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16677        assert_eq!(1, e);
16678    }
16679
16680    #[simd_test(enable = "avx512bw,avx512vl")]
16681    const fn test_mm_reduce_and_epi16() {
16682        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16683        let e = _mm_reduce_and_epi16(a);
16684        assert_eq!(0, e);
16685    }
16686
16687    #[simd_test(enable = "avx512bw,avx512vl")]
16688    const fn test_mm_mask_reduce_and_epi16() {
16689        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16690        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16691        assert_eq!(1, e);
16692    }
16693
16694    #[simd_test(enable = "avx512bw,avx512vl")]
16695    const fn test_mm256_reduce_and_epi8() {
16696        let a = _mm256_set_epi8(
16697            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16698            2, 2, 2,
16699        );
16700        let e = _mm256_reduce_and_epi8(a);
16701        assert_eq!(0, e);
16702    }
16703
16704    #[simd_test(enable = "avx512bw,avx512vl")]
16705    const fn test_mm256_mask_reduce_and_epi8() {
16706        let a = _mm256_set_epi8(
16707            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16708            2, 2, 2,
16709        );
16710        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16711        assert_eq!(1, e);
16712    }
16713
16714    #[simd_test(enable = "avx512bw,avx512vl")]
16715    const fn test_mm_reduce_and_epi8() {
16716        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16717        let e = _mm_reduce_and_epi8(a);
16718        assert_eq!(0, e);
16719    }
16720
16721    #[simd_test(enable = "avx512bw,avx512vl")]
16722    const fn test_mm_mask_reduce_and_epi8() {
16723        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16724        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16725        assert_eq!(1, e);
16726    }
16727
16728    #[simd_test(enable = "avx512bw,avx512vl")]
16729    const fn test_mm256_reduce_mul_epi16() {
16730        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16731        let e = _mm256_reduce_mul_epi16(a);
16732        assert_eq!(256, e);
16733    }
16734
16735    #[simd_test(enable = "avx512bw,avx512vl")]
16736    const fn test_mm256_mask_reduce_mul_epi16() {
16737        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16738        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16739        assert_eq!(1, e);
16740    }
16741
16742    #[simd_test(enable = "avx512bw,avx512vl")]
16743    const fn test_mm_reduce_mul_epi16() {
16744        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16745        let e = _mm_reduce_mul_epi16(a);
16746        assert_eq!(16, e);
16747    }
16748
16749    #[simd_test(enable = "avx512bw,avx512vl")]
16750    const fn test_mm_mask_reduce_mul_epi16() {
16751        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16752        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16753        assert_eq!(1, e);
16754    }
16755
16756    #[simd_test(enable = "avx512bw,avx512vl")]
16757    const fn test_mm256_reduce_mul_epi8() {
16758        let a = _mm256_set_epi8(
16759            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16760            2, 2, 2,
16761        );
16762        let e = _mm256_reduce_mul_epi8(a);
16763        assert_eq!(64, e);
16764    }
16765
16766    #[simd_test(enable = "avx512bw,avx512vl")]
16767    const fn test_mm256_mask_reduce_mul_epi8() {
16768        let a = _mm256_set_epi8(
16769            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16770            2, 2, 2,
16771        );
16772        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16773        assert_eq!(1, e);
16774    }
16775
16776    #[simd_test(enable = "avx512bw,avx512vl")]
16777    const fn test_mm_reduce_mul_epi8() {
16778        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16779        let e = _mm_reduce_mul_epi8(a);
16780        assert_eq!(8, e);
16781    }
16782
16783    #[simd_test(enable = "avx512bw,avx512vl")]
16784    const fn test_mm_mask_reduce_mul_epi8() {
16785        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16786        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16787        assert_eq!(1, e);
16788    }
16789
16790    #[simd_test(enable = "avx512bw,avx512vl")]
16791    const fn test_mm256_reduce_max_epi16() {
16792        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16793        let e: i16 = _mm256_reduce_max_epi16(a);
16794        assert_eq!(15, e);
16795    }
16796
16797    #[simd_test(enable = "avx512bw,avx512vl")]
16798    const fn test_mm256_mask_reduce_max_epi16() {
16799        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16800        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16801        assert_eq!(7, e);
16802    }
16803
16804    #[simd_test(enable = "avx512bw,avx512vl")]
16805    const fn test_mm_reduce_max_epi16() {
16806        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16807        let e: i16 = _mm_reduce_max_epi16(a);
16808        assert_eq!(7, e);
16809    }
16810
16811    #[simd_test(enable = "avx512bw,avx512vl")]
16812    const fn test_mm_mask_reduce_max_epi16() {
16813        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16814        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16815        assert_eq!(3, e);
16816    }
16817
16818    #[simd_test(enable = "avx512bw,avx512vl")]
16819    const fn test_mm256_reduce_max_epi8() {
16820        let a = _mm256_set_epi8(
16821            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16822            24, 25, 26, 27, 28, 29, 30, 31,
16823        );
16824        let e: i8 = _mm256_reduce_max_epi8(a);
16825        assert_eq!(31, e);
16826    }
16827
16828    #[simd_test(enable = "avx512bw,avx512vl")]
16829    const fn test_mm256_mask_reduce_max_epi8() {
16830        let a = _mm256_set_epi8(
16831            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16832            24, 25, 26, 27, 28, 29, 30, 31,
16833        );
16834        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
16835        assert_eq!(15, e);
16836    }
16837
16838    #[simd_test(enable = "avx512bw,avx512vl")]
16839    const fn test_mm_reduce_max_epi8() {
16840        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16841        let e: i8 = _mm_reduce_max_epi8(a);
16842        assert_eq!(15, e);
16843    }
16844
16845    #[simd_test(enable = "avx512bw,avx512vl")]
16846    const fn test_mm_mask_reduce_max_epi8() {
16847        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16848        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
16849        assert_eq!(7, e);
16850    }
16851
16852    #[simd_test(enable = "avx512bw,avx512vl")]
16853    const fn test_mm256_reduce_max_epu16() {
16854        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16855        let e: u16 = _mm256_reduce_max_epu16(a);
16856        assert_eq!(15, e);
16857    }
16858
16859    #[simd_test(enable = "avx512bw,avx512vl")]
16860    const fn test_mm256_mask_reduce_max_epu16() {
16861        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16862        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
16863        assert_eq!(7, e);
16864    }
16865
16866    #[simd_test(enable = "avx512bw,avx512vl")]
16867    const fn test_mm_reduce_max_epu16() {
16868        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16869        let e: u16 = _mm_reduce_max_epu16(a);
16870        assert_eq!(7, e);
16871    }
16872
16873    #[simd_test(enable = "avx512bw,avx512vl")]
16874    const fn test_mm_mask_reduce_max_epu16() {
16875        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16876        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16877        assert_eq!(3, e);
16878    }
16879
16880    #[simd_test(enable = "avx512bw,avx512vl")]
16881    const fn test_mm256_reduce_max_epu8() {
16882        let a = _mm256_set_epi8(
16883            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16884            24, 25, 26, 27, 28, 29, 30, 31,
16885        );
16886        let e: u8 = _mm256_reduce_max_epu8(a);
16887        assert_eq!(31, e);
16888    }
16889
16890    #[simd_test(enable = "avx512bw,avx512vl")]
16891    const fn test_mm256_mask_reduce_max_epu8() {
16892        let a = _mm256_set_epi8(
16893            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16894            24, 25, 26, 27, 28, 29, 30, 31,
16895        );
16896        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16897        assert_eq!(15, e);
16898    }
16899
16900    #[simd_test(enable = "avx512bw,avx512vl")]
16901    const fn test_mm_reduce_max_epu8() {
16902        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16903        let e: u8 = _mm_reduce_max_epu8(a);
16904        assert_eq!(15, e);
16905    }
16906
16907    #[simd_test(enable = "avx512bw,avx512vl")]
16908    const fn test_mm_mask_reduce_max_epu8() {
16909        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16910        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16911        assert_eq!(7, e);
16912    }
16913
16914    #[simd_test(enable = "avx512bw,avx512vl")]
16915    const fn test_mm256_reduce_min_epi16() {
16916        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16917        let e: i16 = _mm256_reduce_min_epi16(a);
16918        assert_eq!(0, e);
16919    }
16920
16921    #[simd_test(enable = "avx512bw,avx512vl")]
16922    const fn test_mm256_mask_reduce_min_epi16() {
16923        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16924        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16925        assert_eq!(0, e);
16926    }
16927
16928    #[simd_test(enable = "avx512bw,avx512vl")]
16929    const fn test_mm_reduce_min_epi16() {
16930        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16931        let e: i16 = _mm_reduce_min_epi16(a);
16932        assert_eq!(0, e);
16933    }
16934
16935    #[simd_test(enable = "avx512bw,avx512vl")]
16936    const fn test_mm_mask_reduce_min_epi16() {
16937        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16938        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16939        assert_eq!(0, e);
16940    }
16941
16942    #[simd_test(enable = "avx512bw,avx512vl")]
16943    const fn test_mm256_reduce_min_epi8() {
16944        let a = _mm256_set_epi8(
16945            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16946            24, 25, 26, 27, 28, 29, 30, 31,
16947        );
16948        let e: i8 = _mm256_reduce_min_epi8(a);
16949        assert_eq!(0, e);
16950    }
16951
16952    #[simd_test(enable = "avx512bw,avx512vl")]
16953    const fn test_mm256_mask_reduce_min_epi8() {
16954        let a = _mm256_set_epi8(
16955            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16956            24, 25, 26, 27, 28, 29, 30, 31,
16957        );
16958        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16959        assert_eq!(0, e);
16960    }
16961
16962    #[simd_test(enable = "avx512bw,avx512vl")]
16963    const fn test_mm_reduce_min_epi8() {
16964        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16965        let e: i8 = _mm_reduce_min_epi8(a);
16966        assert_eq!(0, e);
16967    }
16968
16969    #[simd_test(enable = "avx512bw,avx512vl")]
16970    const fn test_mm_mask_reduce_min_epi8() {
16971        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16972        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16973        assert_eq!(0, e);
16974    }
16975
16976    #[simd_test(enable = "avx512bw,avx512vl")]
16977    const fn test_mm256_reduce_min_epu16() {
16978        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16979        let e: u16 = _mm256_reduce_min_epu16(a);
16980        assert_eq!(0, e);
16981    }
16982
16983    #[simd_test(enable = "avx512bw,avx512vl")]
16984    const fn test_mm256_mask_reduce_min_epu16() {
16985        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16986        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16987        assert_eq!(0, e);
16988    }
16989
16990    #[simd_test(enable = "avx512bw,avx512vl")]
16991    const fn test_mm_reduce_min_epu16() {
16992        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16993        let e: u16 = _mm_reduce_min_epu16(a);
16994        assert_eq!(0, e);
16995    }
16996
16997    #[simd_test(enable = "avx512bw,avx512vl")]
16998    const fn test_mm_mask_reduce_min_epu16() {
16999        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17000        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
17001        assert_eq!(0, e);
17002    }
17003
17004    #[simd_test(enable = "avx512bw,avx512vl")]
17005    const fn test_mm256_reduce_min_epu8() {
17006        let a = _mm256_set_epi8(
17007            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17008            24, 25, 26, 27, 28, 29, 30, 31,
17009        );
17010        let e: u8 = _mm256_reduce_min_epu8(a);
17011        assert_eq!(0, e);
17012    }
17013
17014    #[simd_test(enable = "avx512bw,avx512vl")]
17015    const fn test_mm256_mask_reduce_min_epu8() {
17016        let a = _mm256_set_epi8(
17017            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17018            24, 25, 26, 27, 28, 29, 30, 31,
17019        );
17020        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
17021        assert_eq!(0, e);
17022    }
17023
17024    #[simd_test(enable = "avx512bw,avx512vl")]
17025    const fn test_mm_reduce_min_epu8() {
17026        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17027        let e: u8 = _mm_reduce_min_epu8(a);
17028        assert_eq!(0, e);
17029    }
17030
17031    #[simd_test(enable = "avx512bw,avx512vl")]
17032    const fn test_mm_mask_reduce_min_epu8() {
17033        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17034        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
17035        assert_eq!(0, e);
17036    }
17037
17038    #[simd_test(enable = "avx512bw,avx512vl")]
17039    const fn test_mm256_reduce_or_epi16() {
17040        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17041        let e = _mm256_reduce_or_epi16(a);
17042        assert_eq!(3, e);
17043    }
17044
17045    #[simd_test(enable = "avx512bw,avx512vl")]
17046    const fn test_mm256_mask_reduce_or_epi16() {
17047        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17048        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
17049        assert_eq!(1, e);
17050    }
17051
17052    #[simd_test(enable = "avx512bw,avx512vl")]
17053    const fn test_mm_reduce_or_epi16() {
17054        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17055        let e = _mm_reduce_or_epi16(a);
17056        assert_eq!(3, e);
17057    }
17058
17059    #[simd_test(enable = "avx512bw,avx512vl")]
17060    const fn test_mm_mask_reduce_or_epi16() {
17061        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17062        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
17063        assert_eq!(1, e);
17064    }
17065
17066    #[simd_test(enable = "avx512bw,avx512vl")]
17067    const fn test_mm256_reduce_or_epi8() {
17068        let a = _mm256_set_epi8(
17069            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17070            2, 2, 2,
17071        );
17072        let e = _mm256_reduce_or_epi8(a);
17073        assert_eq!(3, e);
17074    }
17075
17076    #[simd_test(enable = "avx512bw,avx512vl")]
17077    const fn test_mm256_mask_reduce_or_epi8() {
17078        let a = _mm256_set_epi8(
17079            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17080            2, 2, 2,
17081        );
17082        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
17083        assert_eq!(1, e);
17084    }
17085
17086    #[simd_test(enable = "avx512bw,avx512vl")]
17087    const fn test_mm_reduce_or_epi8() {
17088        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17089        let e = _mm_reduce_or_epi8(a);
17090        assert_eq!(3, e);
17091    }
17092
17093    #[simd_test(enable = "avx512bw,avx512vl")]
17094    const fn test_mm_mask_reduce_or_epi8() {
17095        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17096        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
17097        assert_eq!(1, e);
17098    }
17099
17100    #[simd_test(enable = "avx512bw")]
17101    const unsafe fn test_mm512_loadu_epi16() {
17102        #[rustfmt::skip]
17103        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17104        let r = _mm512_loadu_epi16(&a[0]);
17105        #[rustfmt::skip]
17106        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17107        assert_eq_m512i(r, e);
17108    }
17109
17110    #[simd_test(enable = "avx512bw,avx512vl")]
17111    const unsafe fn test_mm256_loadu_epi16() {
17112        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17113        let r = _mm256_loadu_epi16(&a[0]);
17114        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17115        assert_eq_m256i(r, e);
17116    }
17117
17118    #[simd_test(enable = "avx512bw,avx512vl")]
17119    const unsafe fn test_mm_loadu_epi16() {
17120        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
17121        let r = _mm_loadu_epi16(&a[0]);
17122        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
17123        assert_eq_m128i(r, e);
17124    }
17125
17126    #[simd_test(enable = "avx512bw")]
17127    const unsafe fn test_mm512_loadu_epi8() {
17128        #[rustfmt::skip]
17129        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17130                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17131        let r = _mm512_loadu_epi8(&a[0]);
17132        #[rustfmt::skip]
17133        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
17134                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17135        assert_eq_m512i(r, e);
17136    }
17137
17138    #[simd_test(enable = "avx512bw,avx512vl")]
17139    const unsafe fn test_mm256_loadu_epi8() {
17140        #[rustfmt::skip]
17141        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17142        let r = _mm256_loadu_epi8(&a[0]);
17143        #[rustfmt::skip]
17144        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17145        assert_eq_m256i(r, e);
17146    }
17147
17148    #[simd_test(enable = "avx512bw,avx512vl")]
17149    const unsafe fn test_mm_loadu_epi8() {
17150        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17151        let r = _mm_loadu_epi8(&a[0]);
17152        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17153        assert_eq_m128i(r, e);
17154    }
17155
17156    #[simd_test(enable = "avx512bw")]
17157    const unsafe fn test_mm512_storeu_epi16() {
17158        let a = _mm512_set1_epi16(9);
17159        let mut r = _mm512_undefined_epi32();
17160        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17161        assert_eq_m512i(r, a);
17162    }
17163
17164    #[simd_test(enable = "avx512bw,avx512vl")]
17165    const unsafe fn test_mm256_storeu_epi16() {
17166        let a = _mm256_set1_epi16(9);
17167        let mut r = _mm256_set1_epi32(0);
17168        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17169        assert_eq_m256i(r, a);
17170    }
17171
17172    #[simd_test(enable = "avx512bw,avx512vl")]
17173    const unsafe fn test_mm_storeu_epi16() {
17174        let a = _mm_set1_epi16(9);
17175        let mut r = _mm_set1_epi32(0);
17176        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17177        assert_eq_m128i(r, a);
17178    }
17179
17180    #[simd_test(enable = "avx512bw")]
17181    const unsafe fn test_mm512_storeu_epi8() {
17182        let a = _mm512_set1_epi8(9);
17183        let mut r = _mm512_undefined_epi32();
17184        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17185        assert_eq_m512i(r, a);
17186    }
17187
17188    #[simd_test(enable = "avx512bw,avx512vl")]
17189    const unsafe fn test_mm256_storeu_epi8() {
17190        let a = _mm256_set1_epi8(9);
17191        let mut r = _mm256_set1_epi32(0);
17192        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17193        assert_eq_m256i(r, a);
17194    }
17195
17196    #[simd_test(enable = "avx512bw,avx512vl")]
17197    const unsafe fn test_mm_storeu_epi8() {
17198        let a = _mm_set1_epi8(9);
17199        let mut r = _mm_set1_epi32(0);
17200        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17201        assert_eq_m128i(r, a);
17202    }
17203
17204    #[simd_test(enable = "avx512bw")]
17205    const unsafe fn test_mm512_mask_loadu_epi16() {
17206        let src = _mm512_set1_epi16(42);
17207        let a = &[
17208            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17209            24, 25, 26, 27, 28, 29, 30, 31, 32,
17210        ];
17211        let p = a.as_ptr();
17212        let m = 0b10101010_11001100_11101000_11001010;
17213        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
17214        let e = &[
17215            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17216            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17217        ];
17218        let e = _mm512_loadu_epi16(e.as_ptr());
17219        assert_eq_m512i(r, e);
17220    }
17221
17222    #[simd_test(enable = "avx512bw")]
17223    const unsafe fn test_mm512_maskz_loadu_epi16() {
17224        let a = &[
17225            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17226            24, 25, 26, 27, 28, 29, 30, 31, 32,
17227        ];
17228        let p = a.as_ptr();
17229        let m = 0b10101010_11001100_11101000_11001010;
17230        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
17231        let e = &[
17232            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17233            26, 0, 28, 0, 30, 0, 32,
17234        ];
17235        let e = _mm512_loadu_epi16(e.as_ptr());
17236        assert_eq_m512i(r, e);
17237    }
17238
17239    #[simd_test(enable = "avx512bw")]
17240    const unsafe fn test_mm512_mask_storeu_epi16() {
17241        let mut r = [42_i16; 32];
17242        let a = &[
17243            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17244            24, 25, 26, 27, 28, 29, 30, 31, 32,
17245        ];
17246        let a = _mm512_loadu_epi16(a.as_ptr());
17247        let m = 0b10101010_11001100_11101000_11001010;
17248        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17249        let e = &[
17250            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17251            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17252        ];
17253        let e = _mm512_loadu_epi16(e.as_ptr());
17254        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
17255    }
17256
17257    #[simd_test(enable = "avx512bw")]
17258    const unsafe fn test_mm512_mask_loadu_epi8() {
17259        let src = _mm512_set1_epi8(42);
17260        let a = &[
17261            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17262            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17263            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17264        ];
17265        let p = a.as_ptr();
17266        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17267        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
17268        let e = &[
17269            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17270            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17271            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17272        ];
17273        let e = _mm512_loadu_epi8(e.as_ptr());
17274        assert_eq_m512i(r, e);
17275    }
17276
17277    #[simd_test(enable = "avx512bw")]
17278    const unsafe fn test_mm512_maskz_loadu_epi8() {
17279        let a = &[
17280            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17281            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17282            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17283        ];
17284        let p = a.as_ptr();
17285        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17286        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
17287        let e = &[
17288            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17289            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
17290            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
17291        ];
17292        let e = _mm512_loadu_epi8(e.as_ptr());
17293        assert_eq_m512i(r, e);
17294    }
17295
17296    #[simd_test(enable = "avx512bw")]
17297    const unsafe fn test_mm512_mask_storeu_epi8() {
17298        let mut r = [42_i8; 64];
17299        let a = &[
17300            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17301            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17302            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17303        ];
17304        let a = _mm512_loadu_epi8(a.as_ptr());
17305        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17306        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17307        let e = &[
17308            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17309            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17310            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17311        ];
17312        let e = _mm512_loadu_epi8(e.as_ptr());
17313        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
17314    }
17315
17316    #[simd_test(enable = "avx512bw,avx512vl")]
17317    const unsafe fn test_mm256_mask_loadu_epi16() {
17318        let src = _mm256_set1_epi16(42);
17319        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17320        let p = a.as_ptr();
17321        let m = 0b11101000_11001010;
17322        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
17323        let e = &[
17324            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17325        ];
17326        let e = _mm256_loadu_epi16(e.as_ptr());
17327        assert_eq_m256i(r, e);
17328    }
17329
17330    #[simd_test(enable = "avx512bw,avx512vl")]
17331    const unsafe fn test_mm256_maskz_loadu_epi16() {
17332        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17333        let p = a.as_ptr();
17334        let m = 0b11101000_11001010;
17335        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
17336        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17337        let e = _mm256_loadu_epi16(e.as_ptr());
17338        assert_eq_m256i(r, e);
17339    }
17340
17341    #[simd_test(enable = "avx512bw,avx512vl")]
17342    const unsafe fn test_mm256_mask_storeu_epi16() {
17343        let mut r = [42_i16; 16];
17344        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17345        let a = _mm256_loadu_epi16(a.as_ptr());
17346        let m = 0b11101000_11001010;
17347        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17348        let e = &[
17349            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17350        ];
17351        let e = _mm256_loadu_epi16(e.as_ptr());
17352        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
17353    }
17354
17355    #[simd_test(enable = "avx512bw,avx512vl")]
17356    const unsafe fn test_mm256_mask_loadu_epi8() {
17357        let src = _mm256_set1_epi8(42);
17358        let a = &[
17359            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17360            24, 25, 26, 27, 28, 29, 30, 31, 32,
17361        ];
17362        let p = a.as_ptr();
17363        let m = 0b10101010_11001100_11101000_11001010;
17364        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
17365        let e = &[
17366            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17367            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17368        ];
17369        let e = _mm256_loadu_epi8(e.as_ptr());
17370        assert_eq_m256i(r, e);
17371    }
17372
17373    #[simd_test(enable = "avx512bw,avx512vl")]
17374    const unsafe fn test_mm256_maskz_loadu_epi8() {
17375        let a = &[
17376            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17377            24, 25, 26, 27, 28, 29, 30, 31, 32,
17378        ];
17379        let p = a.as_ptr();
17380        let m = 0b10101010_11001100_11101000_11001010;
17381        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
17382        let e = &[
17383            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17384            26, 0, 28, 0, 30, 0, 32,
17385        ];
17386        let e = _mm256_loadu_epi8(e.as_ptr());
17387        assert_eq_m256i(r, e);
17388    }
17389
17390    #[simd_test(enable = "avx512bw,avx512vl")]
17391    const unsafe fn test_mm256_mask_storeu_epi8() {
17392        let mut r = [42_i8; 32];
17393        let a = &[
17394            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17395            24, 25, 26, 27, 28, 29, 30, 31, 32,
17396        ];
17397        let a = _mm256_loadu_epi8(a.as_ptr());
17398        let m = 0b10101010_11001100_11101000_11001010;
17399        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17400        let e = &[
17401            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17402            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17403        ];
17404        let e = _mm256_loadu_epi8(e.as_ptr());
17405        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
17406    }
17407
17408    #[simd_test(enable = "avx512bw,avx512vl")]
17409    const unsafe fn test_mm_mask_loadu_epi16() {
17410        let src = _mm_set1_epi16(42);
17411        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17412        let p = a.as_ptr();
17413        let m = 0b11001010;
17414        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
17415        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17416        let e = _mm_loadu_epi16(e.as_ptr());
17417        assert_eq_m128i(r, e);
17418    }
17419
17420    #[simd_test(enable = "avx512bw,avx512vl")]
17421    const unsafe fn test_mm_maskz_loadu_epi16() {
17422        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17423        let p = a.as_ptr();
17424        let m = 0b11001010;
17425        let r = _mm_maskz_loadu_epi16(m, black_box(p));
17426        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
17427        let e = _mm_loadu_epi16(e.as_ptr());
17428        assert_eq_m128i(r, e);
17429    }
17430
17431    #[simd_test(enable = "avx512bw,avx512vl")]
17432    const unsafe fn test_mm_mask_storeu_epi16() {
17433        let mut r = [42_i16; 8];
17434        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17435        let a = _mm_loadu_epi16(a.as_ptr());
17436        let m = 0b11001010;
17437        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17438        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17439        let e = _mm_loadu_epi16(e.as_ptr());
17440        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
17441    }
17442
17443    #[simd_test(enable = "avx512bw,avx512vl")]
17444    const unsafe fn test_mm_mask_loadu_epi8() {
17445        let src = _mm_set1_epi8(42);
17446        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17447        let p = a.as_ptr();
17448        let m = 0b11101000_11001010;
17449        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
17450        let e = &[
17451            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17452        ];
17453        let e = _mm_loadu_epi8(e.as_ptr());
17454        assert_eq_m128i(r, e);
17455    }
17456
17457    #[simd_test(enable = "avx512bw,avx512vl")]
17458    const unsafe fn test_mm_maskz_loadu_epi8() {
17459        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17460        let p = a.as_ptr();
17461        let m = 0b11101000_11001010;
17462        let r = _mm_maskz_loadu_epi8(m, black_box(p));
17463        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17464        let e = _mm_loadu_epi8(e.as_ptr());
17465        assert_eq_m128i(r, e);
17466    }
17467
17468    #[simd_test(enable = "avx512bw,avx512vl")]
17469    const unsafe fn test_mm_mask_storeu_epi8() {
17470        let mut r = [42_i8; 16];
17471        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17472        let a = _mm_loadu_epi8(a.as_ptr());
17473        let m = 0b11101000_11001010;
17474        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17475        let e = &[
17476            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17477        ];
17478        let e = _mm_loadu_epi8(e.as_ptr());
17479        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
17480    }
17481
17482    #[simd_test(enable = "avx512bw")]
17483    fn test_mm512_madd_epi16() {
17484        let a = _mm512_set1_epi16(1);
17485        let b = _mm512_set1_epi16(1);
17486        let r = _mm512_madd_epi16(a, b);
17487        let e = _mm512_set1_epi32(2);
17488        assert_eq_m512i(r, e);
17489    }
17490
17491    #[simd_test(enable = "avx512bw")]
17492    fn test_mm512_mask_madd_epi16() {
17493        let a = _mm512_set1_epi16(1);
17494        let b = _mm512_set1_epi16(1);
17495        let r = _mm512_mask_madd_epi16(a, 0, a, b);
17496        assert_eq_m512i(r, a);
17497        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
17498        let e = _mm512_set_epi32(
17499            1 << 16 | 1,
17500            1 << 16 | 1,
17501            1 << 16 | 1,
17502            1 << 16 | 1,
17503            1 << 16 | 1,
17504            1 << 16 | 1,
17505            1 << 16 | 1,
17506            1 << 16 | 1,
17507            1 << 16 | 1,
17508            1 << 16 | 1,
17509            1 << 16 | 1,
17510            1 << 16 | 1,
17511            2,
17512            2,
17513            2,
17514            2,
17515        );
17516        assert_eq_m512i(r, e);
17517    }
17518
17519    #[simd_test(enable = "avx512bw")]
17520    fn test_mm512_maskz_madd_epi16() {
17521        let a = _mm512_set1_epi16(1);
17522        let b = _mm512_set1_epi16(1);
17523        let r = _mm512_maskz_madd_epi16(0, a, b);
17524        assert_eq_m512i(r, _mm512_setzero_si512());
17525        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
17526        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
17527        assert_eq_m512i(r, e);
17528    }
17529
17530    #[simd_test(enable = "avx512bw,avx512vl")]
17531    fn test_mm256_mask_madd_epi16() {
17532        let a = _mm256_set1_epi16(1);
17533        let b = _mm256_set1_epi16(1);
17534        let r = _mm256_mask_madd_epi16(a, 0, a, b);
17535        assert_eq_m256i(r, a);
17536        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
17537        let e = _mm256_set_epi32(
17538            1 << 16 | 1,
17539            1 << 16 | 1,
17540            1 << 16 | 1,
17541            1 << 16 | 1,
17542            2,
17543            2,
17544            2,
17545            2,
17546        );
17547        assert_eq_m256i(r, e);
17548    }
17549
17550    #[simd_test(enable = "avx512bw,avx512vl")]
17551    fn test_mm256_maskz_madd_epi16() {
17552        let a = _mm256_set1_epi16(1);
17553        let b = _mm256_set1_epi16(1);
17554        let r = _mm256_maskz_madd_epi16(0, a, b);
17555        assert_eq_m256i(r, _mm256_setzero_si256());
17556        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
17557        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
17558        assert_eq_m256i(r, e);
17559    }
17560
17561    #[simd_test(enable = "avx512bw,avx512vl")]
17562    fn test_mm_mask_madd_epi16() {
17563        let a = _mm_set1_epi16(1);
17564        let b = _mm_set1_epi16(1);
17565        let r = _mm_mask_madd_epi16(a, 0, a, b);
17566        assert_eq_m128i(r, a);
17567        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
17568        let e = _mm_set_epi32(2, 2, 2, 2);
17569        assert_eq_m128i(r, e);
17570    }
17571
17572    #[simd_test(enable = "avx512bw,avx512vl")]
17573    fn test_mm_maskz_madd_epi16() {
17574        let a = _mm_set1_epi16(1);
17575        let b = _mm_set1_epi16(1);
17576        let r = _mm_maskz_madd_epi16(0, a, b);
17577        assert_eq_m128i(r, _mm_setzero_si128());
17578        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
17579        let e = _mm_set_epi32(2, 2, 2, 2);
17580        assert_eq_m128i(r, e);
17581    }
17582
17583    #[simd_test(enable = "avx512bw")]
17584    fn test_mm512_maddubs_epi16() {
17585        let a = _mm512_set1_epi8(1);
17586        let b = _mm512_set1_epi8(1);
17587        let r = _mm512_maddubs_epi16(a, b);
17588        let e = _mm512_set1_epi16(2);
17589        assert_eq_m512i(r, e);
17590    }
17591
17592    #[simd_test(enable = "avx512bw")]
17593    fn test_mm512_mask_maddubs_epi16() {
17594        let a = _mm512_set1_epi8(1);
17595        let b = _mm512_set1_epi8(1);
17596        let src = _mm512_set1_epi16(1);
17597        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
17598        assert_eq_m512i(r, src);
17599        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
17600        #[rustfmt::skip]
17601        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17602                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
17603        assert_eq_m512i(r, e);
17604    }
17605
17606    #[simd_test(enable = "avx512bw")]
17607    fn test_mm512_maskz_maddubs_epi16() {
17608        let a = _mm512_set1_epi8(1);
17609        let b = _mm512_set1_epi8(1);
17610        let r = _mm512_maskz_maddubs_epi16(0, a, b);
17611        assert_eq_m512i(r, _mm512_setzero_si512());
17612        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
17613        #[rustfmt::skip]
17614        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
17615                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17616        assert_eq_m512i(r, e);
17617    }
17618
17619    #[simd_test(enable = "avx512bw,avx512vl")]
17620    fn test_mm256_mask_maddubs_epi16() {
17621        let a = _mm256_set1_epi8(1);
17622        let b = _mm256_set1_epi8(1);
17623        let src = _mm256_set1_epi16(1);
17624        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
17625        assert_eq_m256i(r, src);
17626        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
17627        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17628        assert_eq_m256i(r, e);
17629    }
17630
17631    #[simd_test(enable = "avx512bw,avx512vl")]
17632    fn test_mm256_maskz_maddubs_epi16() {
17633        let a = _mm256_set1_epi8(1);
17634        let b = _mm256_set1_epi8(1);
17635        let r = _mm256_maskz_maddubs_epi16(0, a, b);
17636        assert_eq_m256i(r, _mm256_setzero_si256());
17637        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
17638        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17639        assert_eq_m256i(r, e);
17640    }
17641
17642    #[simd_test(enable = "avx512bw,avx512vl")]
17643    fn test_mm_mask_maddubs_epi16() {
17644        let a = _mm_set1_epi8(1);
17645        let b = _mm_set1_epi8(1);
17646        let src = _mm_set1_epi16(1);
17647        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
17648        assert_eq_m128i(r, src);
17649        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
17650        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17651        assert_eq_m128i(r, e);
17652    }
17653
17654    #[simd_test(enable = "avx512bw,avx512vl")]
17655    fn test_mm_maskz_maddubs_epi16() {
17656        let a = _mm_set1_epi8(1);
17657        let b = _mm_set1_epi8(1);
17658        let r = _mm_maskz_maddubs_epi16(0, a, b);
17659        assert_eq_m128i(r, _mm_setzero_si128());
17660        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17661        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17662        assert_eq_m128i(r, e);
17663    }
17664
17665    #[simd_test(enable = "avx512bw")]
17666    fn test_mm512_packs_epi32() {
17667        let a = _mm512_set1_epi32(i32::MAX);
17668        let b = _mm512_set1_epi32(1);
17669        let r = _mm512_packs_epi32(a, b);
17670        #[rustfmt::skip]
17671        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17672                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17673        assert_eq_m512i(r, e);
17674    }
17675
17676    #[simd_test(enable = "avx512bw")]
17677    fn test_mm512_mask_packs_epi32() {
17678        let a = _mm512_set1_epi32(i32::MAX);
17679        let b = _mm512_set1_epi32(1 << 16 | 1);
17680        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17681        assert_eq_m512i(r, a);
17682        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17683        #[rustfmt::skip]
17684        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17685                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17686        assert_eq_m512i(r, e);
17687    }
17688
17689    #[simd_test(enable = "avx512bw")]
17690    fn test_mm512_maskz_packs_epi32() {
17691        let a = _mm512_set1_epi32(i32::MAX);
17692        let b = _mm512_set1_epi32(1);
17693        let r = _mm512_maskz_packs_epi32(0, a, b);
17694        assert_eq_m512i(r, _mm512_setzero_si512());
17695        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17696        #[rustfmt::skip]
17697        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17698                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17699        assert_eq_m512i(r, e);
17700    }
17701
17702    #[simd_test(enable = "avx512bw,avx512vl")]
17703    fn test_mm256_mask_packs_epi32() {
17704        let a = _mm256_set1_epi32(i32::MAX);
17705        let b = _mm256_set1_epi32(1 << 16 | 1);
17706        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17707        assert_eq_m256i(r, a);
17708        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17709        #[rustfmt::skip]
17710        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17711        assert_eq_m256i(r, e);
17712    }
17713
17714    #[simd_test(enable = "avx512bw,avx512vl")]
17715    fn test_mm256_maskz_packs_epi32() {
17716        let a = _mm256_set1_epi32(i32::MAX);
17717        let b = _mm256_set1_epi32(1);
17718        let r = _mm256_maskz_packs_epi32(0, a, b);
17719        assert_eq_m256i(r, _mm256_setzero_si256());
17720        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17721        #[rustfmt::skip]
17722        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17723        assert_eq_m256i(r, e);
17724    }
17725
17726    #[simd_test(enable = "avx512bw,avx512vl")]
17727    fn test_mm_mask_packs_epi32() {
17728        let a = _mm_set1_epi32(i32::MAX);
17729        let b = _mm_set1_epi32(1 << 16 | 1);
17730        let r = _mm_mask_packs_epi32(a, 0, a, b);
17731        assert_eq_m128i(r, a);
17732        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17733        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17734        assert_eq_m128i(r, e);
17735    }
17736
17737    #[simd_test(enable = "avx512bw,avx512vl")]
17738    fn test_mm_maskz_packs_epi32() {
17739        let a = _mm_set1_epi32(i32::MAX);
17740        let b = _mm_set1_epi32(1);
17741        let r = _mm_maskz_packs_epi32(0, a, b);
17742        assert_eq_m128i(r, _mm_setzero_si128());
17743        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17744        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17745        assert_eq_m128i(r, e);
17746    }
17747
17748    #[simd_test(enable = "avx512bw")]
17749    fn test_mm512_packs_epi16() {
17750        let a = _mm512_set1_epi16(i16::MAX);
17751        let b = _mm512_set1_epi16(1);
17752        let r = _mm512_packs_epi16(a, b);
17753        #[rustfmt::skip]
17754        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17755                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17756                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17757                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17758        assert_eq_m512i(r, e);
17759    }
17760
17761    #[simd_test(enable = "avx512bw")]
17762    fn test_mm512_mask_packs_epi16() {
17763        let a = _mm512_set1_epi16(i16::MAX);
17764        let b = _mm512_set1_epi16(1 << 8 | 1);
17765        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17766        assert_eq_m512i(r, a);
17767        let r = _mm512_mask_packs_epi16(
17768            b,
17769            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17770            a,
17771            b,
17772        );
17773        #[rustfmt::skip]
17774        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17775                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17776                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17777                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17778        assert_eq_m512i(r, e);
17779    }
17780
17781    #[simd_test(enable = "avx512bw")]
17782    fn test_mm512_maskz_packs_epi16() {
17783        let a = _mm512_set1_epi16(i16::MAX);
17784        let b = _mm512_set1_epi16(1);
17785        let r = _mm512_maskz_packs_epi16(0, a, b);
17786        assert_eq_m512i(r, _mm512_setzero_si512());
17787        let r = _mm512_maskz_packs_epi16(
17788            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17789            a,
17790            b,
17791        );
17792        #[rustfmt::skip]
17793        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17794                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17795                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17796                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17797        assert_eq_m512i(r, e);
17798    }
17799
17800    #[simd_test(enable = "avx512bw,avx512vl")]
17801    fn test_mm256_mask_packs_epi16() {
17802        let a = _mm256_set1_epi16(i16::MAX);
17803        let b = _mm256_set1_epi16(1 << 8 | 1);
17804        let r = _mm256_mask_packs_epi16(a, 0, a, b);
17805        assert_eq_m256i(r, a);
17806        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17807        #[rustfmt::skip]
17808        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17809                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17810        assert_eq_m256i(r, e);
17811    }
17812
17813    #[simd_test(enable = "avx512bw,avx512vl")]
17814    fn test_mm256_maskz_packs_epi16() {
17815        let a = _mm256_set1_epi16(i16::MAX);
17816        let b = _mm256_set1_epi16(1);
17817        let r = _mm256_maskz_packs_epi16(0, a, b);
17818        assert_eq_m256i(r, _mm256_setzero_si256());
17819        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
17820        #[rustfmt::skip]
17821        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17822                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17823        assert_eq_m256i(r, e);
17824    }
17825
17826    #[simd_test(enable = "avx512bw,avx512vl")]
17827    fn test_mm_mask_packs_epi16() {
17828        let a = _mm_set1_epi16(i16::MAX);
17829        let b = _mm_set1_epi16(1 << 8 | 1);
17830        let r = _mm_mask_packs_epi16(a, 0, a, b);
17831        assert_eq_m128i(r, a);
17832        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
17833        #[rustfmt::skip]
17834        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17835        assert_eq_m128i(r, e);
17836    }
17837
17838    #[simd_test(enable = "avx512bw,avx512vl")]
17839    fn test_mm_maskz_packs_epi16() {
17840        let a = _mm_set1_epi16(i16::MAX);
17841        let b = _mm_set1_epi16(1);
17842        let r = _mm_maskz_packs_epi16(0, a, b);
17843        assert_eq_m128i(r, _mm_setzero_si128());
17844        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
17845        #[rustfmt::skip]
17846        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17847        assert_eq_m128i(r, e);
17848    }
17849
17850    #[simd_test(enable = "avx512bw")]
17851    fn test_mm512_packus_epi32() {
17852        let a = _mm512_set1_epi32(-1);
17853        let b = _mm512_set1_epi32(1);
17854        let r = _mm512_packus_epi32(a, b);
17855        #[rustfmt::skip]
17856        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
17857                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
17858        assert_eq_m512i(r, e);
17859    }
17860
17861    #[simd_test(enable = "avx512bw")]
17862    fn test_mm512_mask_packus_epi32() {
17863        let a = _mm512_set1_epi32(-1);
17864        let b = _mm512_set1_epi32(1 << 16 | 1);
17865        let r = _mm512_mask_packus_epi32(a, 0, a, b);
17866        assert_eq_m512i(r, a);
17867        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17868        #[rustfmt::skip]
17869        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17870                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17871        assert_eq_m512i(r, e);
17872    }
17873
17874    #[simd_test(enable = "avx512bw")]
17875    fn test_mm512_maskz_packus_epi32() {
17876        let a = _mm512_set1_epi32(-1);
17877        let b = _mm512_set1_epi32(1);
17878        let r = _mm512_maskz_packus_epi32(0, a, b);
17879        assert_eq_m512i(r, _mm512_setzero_si512());
17880        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17881        #[rustfmt::skip]
17882        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17883                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17884        assert_eq_m512i(r, e);
17885    }
17886
17887    #[simd_test(enable = "avx512bw,avx512vl")]
17888    fn test_mm256_mask_packus_epi32() {
17889        let a = _mm256_set1_epi32(-1);
17890        let b = _mm256_set1_epi32(1 << 16 | 1);
17891        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17892        assert_eq_m256i(r, a);
17893        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17894        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17895        assert_eq_m256i(r, e);
17896    }
17897
17898    #[simd_test(enable = "avx512bw,avx512vl")]
17899    fn test_mm256_maskz_packus_epi32() {
17900        let a = _mm256_set1_epi32(-1);
17901        let b = _mm256_set1_epi32(1);
17902        let r = _mm256_maskz_packus_epi32(0, a, b);
17903        assert_eq_m256i(r, _mm256_setzero_si256());
17904        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17905        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17906        assert_eq_m256i(r, e);
17907    }
17908
17909    #[simd_test(enable = "avx512bw,avx512vl")]
17910    fn test_mm_mask_packus_epi32() {
17911        let a = _mm_set1_epi32(-1);
17912        let b = _mm_set1_epi32(1 << 16 | 1);
17913        let r = _mm_mask_packus_epi32(a, 0, a, b);
17914        assert_eq_m128i(r, a);
17915        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17916        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17917        assert_eq_m128i(r, e);
17918    }
17919
17920    #[simd_test(enable = "avx512bw,avx512vl")]
17921    fn test_mm_maskz_packus_epi32() {
17922        let a = _mm_set1_epi32(-1);
17923        let b = _mm_set1_epi32(1);
17924        let r = _mm_maskz_packus_epi32(0, a, b);
17925        assert_eq_m128i(r, _mm_setzero_si128());
17926        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17927        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17928        assert_eq_m128i(r, e);
17929    }
17930
17931    #[simd_test(enable = "avx512bw")]
17932    fn test_mm512_packus_epi16() {
17933        let a = _mm512_set1_epi16(-1);
17934        let b = _mm512_set1_epi16(1);
17935        let r = _mm512_packus_epi16(a, b);
17936        #[rustfmt::skip]
17937        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17938                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17939                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17940                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17941        assert_eq_m512i(r, e);
17942    }
17943
17944    #[simd_test(enable = "avx512bw")]
17945    fn test_mm512_mask_packus_epi16() {
17946        let a = _mm512_set1_epi16(-1);
17947        let b = _mm512_set1_epi16(1 << 8 | 1);
17948        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17949        assert_eq_m512i(r, a);
17950        let r = _mm512_mask_packus_epi16(
17951            b,
17952            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17953            a,
17954            b,
17955        );
17956        #[rustfmt::skip]
17957        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17958                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17959                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17960                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17961        assert_eq_m512i(r, e);
17962    }
17963
17964    #[simd_test(enable = "avx512bw")]
17965    fn test_mm512_maskz_packus_epi16() {
17966        let a = _mm512_set1_epi16(-1);
17967        let b = _mm512_set1_epi16(1);
17968        let r = _mm512_maskz_packus_epi16(0, a, b);
17969        assert_eq_m512i(r, _mm512_setzero_si512());
17970        let r = _mm512_maskz_packus_epi16(
17971            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17972            a,
17973            b,
17974        );
17975        #[rustfmt::skip]
17976        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17977                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17978                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17979                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17980        assert_eq_m512i(r, e);
17981    }
17982
17983    #[simd_test(enable = "avx512bw,avx512vl")]
17984    fn test_mm256_mask_packus_epi16() {
17985        let a = _mm256_set1_epi16(-1);
17986        let b = _mm256_set1_epi16(1 << 8 | 1);
17987        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17988        assert_eq_m256i(r, a);
17989        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17990        #[rustfmt::skip]
17991        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17992                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17993        assert_eq_m256i(r, e);
17994    }
17995
17996    #[simd_test(enable = "avx512bw,avx512vl")]
17997    fn test_mm256_maskz_packus_epi16() {
17998        let a = _mm256_set1_epi16(-1);
17999        let b = _mm256_set1_epi16(1);
18000        let r = _mm256_maskz_packus_epi16(0, a, b);
18001        assert_eq_m256i(r, _mm256_setzero_si256());
18002        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
18003        #[rustfmt::skip]
18004        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18005                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18006        assert_eq_m256i(r, e);
18007    }
18008
18009    #[simd_test(enable = "avx512bw,avx512vl")]
18010    fn test_mm_mask_packus_epi16() {
18011        let a = _mm_set1_epi16(-1);
18012        let b = _mm_set1_epi16(1 << 8 | 1);
18013        let r = _mm_mask_packus_epi16(a, 0, a, b);
18014        assert_eq_m128i(r, a);
18015        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
18016        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18017        assert_eq_m128i(r, e);
18018    }
18019
18020    #[simd_test(enable = "avx512bw,avx512vl")]
18021    fn test_mm_maskz_packus_epi16() {
18022        let a = _mm_set1_epi16(-1);
18023        let b = _mm_set1_epi16(1);
18024        let r = _mm_maskz_packus_epi16(0, a, b);
18025        assert_eq_m128i(r, _mm_setzero_si128());
18026        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
18027        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18028        assert_eq_m128i(r, e);
18029    }
18030
18031    #[simd_test(enable = "avx512bw")]
18032    const fn test_mm512_avg_epu16() {
18033        let a = _mm512_set1_epi16(1);
18034        let b = _mm512_set1_epi16(1);
18035        let r = _mm512_avg_epu16(a, b);
18036        let e = _mm512_set1_epi16(1);
18037        assert_eq_m512i(r, e);
18038    }
18039
18040    #[simd_test(enable = "avx512bw")]
18041    const fn test_mm512_mask_avg_epu16() {
18042        let a = _mm512_set1_epi16(1);
18043        let b = _mm512_set1_epi16(1);
18044        let r = _mm512_mask_avg_epu16(a, 0, a, b);
18045        assert_eq_m512i(r, a);
18046        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
18047        #[rustfmt::skip]
18048        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18049                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18050        assert_eq_m512i(r, e);
18051    }
18052
18053    #[simd_test(enable = "avx512bw")]
18054    const fn test_mm512_maskz_avg_epu16() {
18055        let a = _mm512_set1_epi16(1);
18056        let b = _mm512_set1_epi16(1);
18057        let r = _mm512_maskz_avg_epu16(0, a, b);
18058        assert_eq_m512i(r, _mm512_setzero_si512());
18059        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
18060        #[rustfmt::skip]
18061        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18062                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18063        assert_eq_m512i(r, e);
18064    }
18065
18066    #[simd_test(enable = "avx512bw,avx512vl")]
18067    const fn test_mm256_mask_avg_epu16() {
18068        let a = _mm256_set1_epi16(1);
18069        let b = _mm256_set1_epi16(1);
18070        let r = _mm256_mask_avg_epu16(a, 0, a, b);
18071        assert_eq_m256i(r, a);
18072        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
18073        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18074        assert_eq_m256i(r, e);
18075    }
18076
18077    #[simd_test(enable = "avx512bw,avx512vl")]
18078    const fn test_mm256_maskz_avg_epu16() {
18079        let a = _mm256_set1_epi16(1);
18080        let b = _mm256_set1_epi16(1);
18081        let r = _mm256_maskz_avg_epu16(0, a, b);
18082        assert_eq_m256i(r, _mm256_setzero_si256());
18083        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
18084        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18085        assert_eq_m256i(r, e);
18086    }
18087
18088    #[simd_test(enable = "avx512bw,avx512vl")]
18089    const fn test_mm_mask_avg_epu16() {
18090        let a = _mm_set1_epi16(1);
18091        let b = _mm_set1_epi16(1);
18092        let r = _mm_mask_avg_epu16(a, 0, a, b);
18093        assert_eq_m128i(r, a);
18094        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
18095        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
18096        assert_eq_m128i(r, e);
18097    }
18098
18099    #[simd_test(enable = "avx512bw,avx512vl")]
18100    const fn test_mm_maskz_avg_epu16() {
18101        let a = _mm_set1_epi16(1);
18102        let b = _mm_set1_epi16(1);
18103        let r = _mm_maskz_avg_epu16(0, a, b);
18104        assert_eq_m128i(r, _mm_setzero_si128());
18105        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
18106        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
18107        assert_eq_m128i(r, e);
18108    }
18109
18110    #[simd_test(enable = "avx512bw")]
18111    const fn test_mm512_avg_epu8() {
18112        let a = _mm512_set1_epi8(1);
18113        let b = _mm512_set1_epi8(1);
18114        let r = _mm512_avg_epu8(a, b);
18115        let e = _mm512_set1_epi8(1);
18116        assert_eq_m512i(r, e);
18117    }
18118
18119    #[simd_test(enable = "avx512bw")]
18120    const fn test_mm512_mask_avg_epu8() {
18121        let a = _mm512_set1_epi8(1);
18122        let b = _mm512_set1_epi8(1);
18123        let r = _mm512_mask_avg_epu8(a, 0, a, b);
18124        assert_eq_m512i(r, a);
18125        let r = _mm512_mask_avg_epu8(
18126            a,
18127            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18128            a,
18129            b,
18130        );
18131        #[rustfmt::skip]
18132        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18133                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18134                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18135                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18136        assert_eq_m512i(r, e);
18137    }
18138
18139    #[simd_test(enable = "avx512bw")]
18140    const fn test_mm512_maskz_avg_epu8() {
18141        let a = _mm512_set1_epi8(1);
18142        let b = _mm512_set1_epi8(1);
18143        let r = _mm512_maskz_avg_epu8(0, a, b);
18144        assert_eq_m512i(r, _mm512_setzero_si512());
18145        let r = _mm512_maskz_avg_epu8(
18146            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
18147            a,
18148            b,
18149        );
18150        #[rustfmt::skip]
18151        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18152                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18153                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18154                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18155        assert_eq_m512i(r, e);
18156    }
18157
18158    #[simd_test(enable = "avx512bw,avx512vl")]
18159    const fn test_mm256_mask_avg_epu8() {
18160        let a = _mm256_set1_epi8(1);
18161        let b = _mm256_set1_epi8(1);
18162        let r = _mm256_mask_avg_epu8(a, 0, a, b);
18163        assert_eq_m256i(r, a);
18164        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
18165        #[rustfmt::skip]
18166        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18167                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18168        assert_eq_m256i(r, e);
18169    }
18170
18171    #[simd_test(enable = "avx512bw,avx512vl")]
18172    const fn test_mm256_maskz_avg_epu8() {
18173        let a = _mm256_set1_epi8(1);
18174        let b = _mm256_set1_epi8(1);
18175        let r = _mm256_maskz_avg_epu8(0, a, b);
18176        assert_eq_m256i(r, _mm256_setzero_si256());
18177        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
18178        #[rustfmt::skip]
18179        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18180                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18181        assert_eq_m256i(r, e);
18182    }
18183
18184    #[simd_test(enable = "avx512bw,avx512vl")]
18185    const fn test_mm_mask_avg_epu8() {
18186        let a = _mm_set1_epi8(1);
18187        let b = _mm_set1_epi8(1);
18188        let r = _mm_mask_avg_epu8(a, 0, a, b);
18189        assert_eq_m128i(r, a);
18190        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
18191        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18192        assert_eq_m128i(r, e);
18193    }
18194
18195    #[simd_test(enable = "avx512bw,avx512vl")]
18196    const fn test_mm_maskz_avg_epu8() {
18197        let a = _mm_set1_epi8(1);
18198        let b = _mm_set1_epi8(1);
18199        let r = _mm_maskz_avg_epu8(0, a, b);
18200        assert_eq_m128i(r, _mm_setzero_si128());
18201        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
18202        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18203        assert_eq_m128i(r, e);
18204    }
18205
18206    #[simd_test(enable = "avx512bw")]
18207    fn test_mm512_sll_epi16() {
18208        let a = _mm512_set1_epi16(1 << 15);
18209        let count = _mm_set1_epi16(2);
18210        let r = _mm512_sll_epi16(a, count);
18211        let e = _mm512_set1_epi16(0);
18212        assert_eq_m512i(r, e);
18213    }
18214
18215    #[simd_test(enable = "avx512bw")]
18216    fn test_mm512_mask_sll_epi16() {
18217        let a = _mm512_set1_epi16(1 << 15);
18218        let count = _mm_set1_epi16(2);
18219        let r = _mm512_mask_sll_epi16(a, 0, a, count);
18220        assert_eq_m512i(r, a);
18221        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18222        let e = _mm512_set1_epi16(0);
18223        assert_eq_m512i(r, e);
18224    }
18225
18226    #[simd_test(enable = "avx512bw")]
18227    fn test_mm512_maskz_sll_epi16() {
18228        let a = _mm512_set1_epi16(1 << 15);
18229        let count = _mm_set1_epi16(2);
18230        let r = _mm512_maskz_sll_epi16(0, a, count);
18231        assert_eq_m512i(r, _mm512_setzero_si512());
18232        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
18233        let e = _mm512_set1_epi16(0);
18234        assert_eq_m512i(r, e);
18235    }
18236
18237    #[simd_test(enable = "avx512bw,avx512vl")]
18238    fn test_mm256_mask_sll_epi16() {
18239        let a = _mm256_set1_epi16(1 << 15);
18240        let count = _mm_set1_epi16(2);
18241        let r = _mm256_mask_sll_epi16(a, 0, a, count);
18242        assert_eq_m256i(r, a);
18243        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
18244        let e = _mm256_set1_epi16(0);
18245        assert_eq_m256i(r, e);
18246    }
18247
18248    #[simd_test(enable = "avx512bw,avx512vl")]
18249    fn test_mm256_maskz_sll_epi16() {
18250        let a = _mm256_set1_epi16(1 << 15);
18251        let count = _mm_set1_epi16(2);
18252        let r = _mm256_maskz_sll_epi16(0, a, count);
18253        assert_eq_m256i(r, _mm256_setzero_si256());
18254        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
18255        let e = _mm256_set1_epi16(0);
18256        assert_eq_m256i(r, e);
18257    }
18258
18259    #[simd_test(enable = "avx512bw,avx512vl")]
18260    fn test_mm_mask_sll_epi16() {
18261        let a = _mm_set1_epi16(1 << 15);
18262        let count = _mm_set1_epi16(2);
18263        let r = _mm_mask_sll_epi16(a, 0, a, count);
18264        assert_eq_m128i(r, a);
18265        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
18266        let e = _mm_set1_epi16(0);
18267        assert_eq_m128i(r, e);
18268    }
18269
18270    #[simd_test(enable = "avx512bw,avx512vl")]
18271    fn test_mm_maskz_sll_epi16() {
18272        let a = _mm_set1_epi16(1 << 15);
18273        let count = _mm_set1_epi16(2);
18274        let r = _mm_maskz_sll_epi16(0, a, count);
18275        assert_eq_m128i(r, _mm_setzero_si128());
18276        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
18277        let e = _mm_set1_epi16(0);
18278        assert_eq_m128i(r, e);
18279    }
18280
18281    #[simd_test(enable = "avx512bw")]
18282    const fn test_mm512_slli_epi16() {
18283        let a = _mm512_set1_epi16(1 << 15);
18284        let r = _mm512_slli_epi16::<1>(a);
18285        let e = _mm512_set1_epi16(0);
18286        assert_eq_m512i(r, e);
18287    }
18288
18289    #[simd_test(enable = "avx512bw")]
18290    const fn test_mm512_mask_slli_epi16() {
18291        let a = _mm512_set1_epi16(1 << 15);
18292        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
18293        assert_eq_m512i(r, a);
18294        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
18295        let e = _mm512_set1_epi16(0);
18296        assert_eq_m512i(r, e);
18297    }
18298
18299    #[simd_test(enable = "avx512bw")]
18300    const fn test_mm512_maskz_slli_epi16() {
18301        let a = _mm512_set1_epi16(1 << 15);
18302        let r = _mm512_maskz_slli_epi16::<1>(0, a);
18303        assert_eq_m512i(r, _mm512_setzero_si512());
18304        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
18305        let e = _mm512_set1_epi16(0);
18306        assert_eq_m512i(r, e);
18307    }
18308
18309    #[simd_test(enable = "avx512bw,avx512vl")]
18310    const fn test_mm256_mask_slli_epi16() {
18311        let a = _mm256_set1_epi16(1 << 15);
18312        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
18313        assert_eq_m256i(r, a);
18314        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
18315        let e = _mm256_set1_epi16(0);
18316        assert_eq_m256i(r, e);
18317    }
18318
18319    #[simd_test(enable = "avx512bw,avx512vl")]
18320    const fn test_mm256_maskz_slli_epi16() {
18321        let a = _mm256_set1_epi16(1 << 15);
18322        let r = _mm256_maskz_slli_epi16::<1>(0, a);
18323        assert_eq_m256i(r, _mm256_setzero_si256());
18324        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
18325        let e = _mm256_set1_epi16(0);
18326        assert_eq_m256i(r, e);
18327    }
18328
18329    #[simd_test(enable = "avx512bw,avx512vl")]
18330    const fn test_mm_mask_slli_epi16() {
18331        let a = _mm_set1_epi16(1 << 15);
18332        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
18333        assert_eq_m128i(r, a);
18334        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
18335        let e = _mm_set1_epi16(0);
18336        assert_eq_m128i(r, e);
18337    }
18338
18339    #[simd_test(enable = "avx512bw,avx512vl")]
18340    const fn test_mm_maskz_slli_epi16() {
18341        let a = _mm_set1_epi16(1 << 15);
18342        let r = _mm_maskz_slli_epi16::<1>(0, a);
18343        assert_eq_m128i(r, _mm_setzero_si128());
18344        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
18345        let e = _mm_set1_epi16(0);
18346        assert_eq_m128i(r, e);
18347    }
18348
18349    #[simd_test(enable = "avx512bw")]
18350    const fn test_mm512_sllv_epi16() {
18351        let a = _mm512_set1_epi16(1 << 15);
18352        let count = _mm512_set1_epi16(2);
18353        let r = _mm512_sllv_epi16(a, count);
18354        let e = _mm512_set1_epi16(0);
18355        assert_eq_m512i(r, e);
18356    }
18357
18358    #[simd_test(enable = "avx512bw")]
18359    const fn test_mm512_mask_sllv_epi16() {
18360        let a = _mm512_set1_epi16(1 << 15);
18361        let count = _mm512_set1_epi16(2);
18362        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
18363        assert_eq_m512i(r, a);
18364        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18365        let e = _mm512_set1_epi16(0);
18366        assert_eq_m512i(r, e);
18367    }
18368
18369    #[simd_test(enable = "avx512bw")]
18370    const fn test_mm512_maskz_sllv_epi16() {
18371        let a = _mm512_set1_epi16(1 << 15);
18372        let count = _mm512_set1_epi16(2);
18373        let r = _mm512_maskz_sllv_epi16(0, a, count);
18374        assert_eq_m512i(r, _mm512_setzero_si512());
18375        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18376        let e = _mm512_set1_epi16(0);
18377        assert_eq_m512i(r, e);
18378    }
18379
18380    #[simd_test(enable = "avx512bw,avx512vl")]
18381    const fn test_mm256_sllv_epi16() {
18382        let a = _mm256_set1_epi16(1 << 15);
18383        let count = _mm256_set1_epi16(2);
18384        let r = _mm256_sllv_epi16(a, count);
18385        let e = _mm256_set1_epi16(0);
18386        assert_eq_m256i(r, e);
18387    }
18388
18389    #[simd_test(enable = "avx512bw,avx512vl")]
18390    const fn test_mm256_mask_sllv_epi16() {
18391        let a = _mm256_set1_epi16(1 << 15);
18392        let count = _mm256_set1_epi16(2);
18393        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
18394        assert_eq_m256i(r, a);
18395        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
18396        let e = _mm256_set1_epi16(0);
18397        assert_eq_m256i(r, e);
18398    }
18399
18400    #[simd_test(enable = "avx512bw,avx512vl")]
18401    const fn test_mm256_maskz_sllv_epi16() {
18402        let a = _mm256_set1_epi16(1 << 15);
18403        let count = _mm256_set1_epi16(2);
18404        let r = _mm256_maskz_sllv_epi16(0, a, count);
18405        assert_eq_m256i(r, _mm256_setzero_si256());
18406        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
18407        let e = _mm256_set1_epi16(0);
18408        assert_eq_m256i(r, e);
18409    }
18410
18411    #[simd_test(enable = "avx512bw,avx512vl")]
18412    const fn test_mm_sllv_epi16() {
18413        let a = _mm_set1_epi16(1 << 15);
18414        let count = _mm_set1_epi16(2);
18415        let r = _mm_sllv_epi16(a, count);
18416        let e = _mm_set1_epi16(0);
18417        assert_eq_m128i(r, e);
18418    }
18419
18420    #[simd_test(enable = "avx512bw,avx512vl")]
18421    const fn test_mm_mask_sllv_epi16() {
18422        let a = _mm_set1_epi16(1 << 15);
18423        let count = _mm_set1_epi16(2);
18424        let r = _mm_mask_sllv_epi16(a, 0, a, count);
18425        assert_eq_m128i(r, a);
18426        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
18427        let e = _mm_set1_epi16(0);
18428        assert_eq_m128i(r, e);
18429    }
18430
18431    #[simd_test(enable = "avx512bw,avx512vl")]
18432    const fn test_mm_maskz_sllv_epi16() {
18433        let a = _mm_set1_epi16(1 << 15);
18434        let count = _mm_set1_epi16(2);
18435        let r = _mm_maskz_sllv_epi16(0, a, count);
18436        assert_eq_m128i(r, _mm_setzero_si128());
18437        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
18438        let e = _mm_set1_epi16(0);
18439        assert_eq_m128i(r, e);
18440    }
18441
18442    #[simd_test(enable = "avx512bw")]
18443    fn test_mm512_srl_epi16() {
18444        let a = _mm512_set1_epi16(1 << 1);
18445        let count = _mm_set1_epi16(2);
18446        let r = _mm512_srl_epi16(a, count);
18447        let e = _mm512_set1_epi16(0);
18448        assert_eq_m512i(r, e);
18449    }
18450
18451    #[simd_test(enable = "avx512bw")]
18452    fn test_mm512_mask_srl_epi16() {
18453        let a = _mm512_set1_epi16(1 << 1);
18454        let count = _mm_set1_epi16(2);
18455        let r = _mm512_mask_srl_epi16(a, 0, a, count);
18456        assert_eq_m512i(r, a);
18457        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18458        let e = _mm512_set1_epi16(0);
18459        assert_eq_m512i(r, e);
18460    }
18461
18462    #[simd_test(enable = "avx512bw")]
18463    fn test_mm512_maskz_srl_epi16() {
18464        let a = _mm512_set1_epi16(1 << 1);
18465        let count = _mm_set1_epi16(2);
18466        let r = _mm512_maskz_srl_epi16(0, a, count);
18467        assert_eq_m512i(r, _mm512_setzero_si512());
18468        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
18469        let e = _mm512_set1_epi16(0);
18470        assert_eq_m512i(r, e);
18471    }
18472
18473    #[simd_test(enable = "avx512bw,avx512vl")]
18474    fn test_mm256_mask_srl_epi16() {
18475        let a = _mm256_set1_epi16(1 << 1);
18476        let count = _mm_set1_epi16(2);
18477        let r = _mm256_mask_srl_epi16(a, 0, a, count);
18478        assert_eq_m256i(r, a);
18479        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
18480        let e = _mm256_set1_epi16(0);
18481        assert_eq_m256i(r, e);
18482    }
18483
18484    #[simd_test(enable = "avx512bw,avx512vl")]
18485    fn test_mm256_maskz_srl_epi16() {
18486        let a = _mm256_set1_epi16(1 << 1);
18487        let count = _mm_set1_epi16(2);
18488        let r = _mm256_maskz_srl_epi16(0, a, count);
18489        assert_eq_m256i(r, _mm256_setzero_si256());
18490        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
18491        let e = _mm256_set1_epi16(0);
18492        assert_eq_m256i(r, e);
18493    }
18494
18495    #[simd_test(enable = "avx512bw,avx512vl")]
18496    fn test_mm_mask_srl_epi16() {
18497        let a = _mm_set1_epi16(1 << 1);
18498        let count = _mm_set1_epi16(2);
18499        let r = _mm_mask_srl_epi16(a, 0, a, count);
18500        assert_eq_m128i(r, a);
18501        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
18502        let e = _mm_set1_epi16(0);
18503        assert_eq_m128i(r, e);
18504    }
18505
18506    #[simd_test(enable = "avx512bw,avx512vl")]
18507    fn test_mm_maskz_srl_epi16() {
18508        let a = _mm_set1_epi16(1 << 1);
18509        let count = _mm_set1_epi16(2);
18510        let r = _mm_maskz_srl_epi16(0, a, count);
18511        assert_eq_m128i(r, _mm_setzero_si128());
18512        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
18513        let e = _mm_set1_epi16(0);
18514        assert_eq_m128i(r, e);
18515    }
18516
18517    #[simd_test(enable = "avx512bw")]
18518    const fn test_mm512_srli_epi16() {
18519        let a = _mm512_set1_epi16(1 << 1);
18520        let r = _mm512_srli_epi16::<2>(a);
18521        let e = _mm512_set1_epi16(0);
18522        assert_eq_m512i(r, e);
18523    }
18524
18525    #[simd_test(enable = "avx512bw")]
18526    const fn test_mm512_mask_srli_epi16() {
18527        let a = _mm512_set1_epi16(1 << 1);
18528        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
18529        assert_eq_m512i(r, a);
18530        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18531        let e = _mm512_set1_epi16(0);
18532        assert_eq_m512i(r, e);
18533    }
18534
18535    #[simd_test(enable = "avx512bw")]
18536    const fn test_mm512_maskz_srli_epi16() {
18537        let a = _mm512_set1_epi16(1 << 1);
18538        let r = _mm512_maskz_srli_epi16::<2>(0, a);
18539        assert_eq_m512i(r, _mm512_setzero_si512());
18540        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18541        let e = _mm512_set1_epi16(0);
18542        assert_eq_m512i(r, e);
18543    }
18544
18545    #[simd_test(enable = "avx512bw,avx512vl")]
18546    const fn test_mm256_mask_srli_epi16() {
18547        let a = _mm256_set1_epi16(1 << 1);
18548        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
18549        assert_eq_m256i(r, a);
18550        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
18551        let e = _mm256_set1_epi16(0);
18552        assert_eq_m256i(r, e);
18553    }
18554
18555    #[simd_test(enable = "avx512bw,avx512vl")]
18556    const fn test_mm256_maskz_srli_epi16() {
18557        let a = _mm256_set1_epi16(1 << 1);
18558        let r = _mm256_maskz_srli_epi16::<2>(0, a);
18559        assert_eq_m256i(r, _mm256_setzero_si256());
18560        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
18561        let e = _mm256_set1_epi16(0);
18562        assert_eq_m256i(r, e);
18563    }
18564
18565    #[simd_test(enable = "avx512bw,avx512vl")]
18566    const fn test_mm_mask_srli_epi16() {
18567        let a = _mm_set1_epi16(1 << 1);
18568        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
18569        assert_eq_m128i(r, a);
18570        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
18571        let e = _mm_set1_epi16(0);
18572        assert_eq_m128i(r, e);
18573    }
18574
18575    #[simd_test(enable = "avx512bw,avx512vl")]
18576    const fn test_mm_maskz_srli_epi16() {
18577        let a = _mm_set1_epi16(1 << 1);
18578        let r = _mm_maskz_srli_epi16::<2>(0, a);
18579        assert_eq_m128i(r, _mm_setzero_si128());
18580        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
18581        let e = _mm_set1_epi16(0);
18582        assert_eq_m128i(r, e);
18583    }
18584
18585    #[simd_test(enable = "avx512bw")]
18586    const fn test_mm512_srlv_epi16() {
18587        let a = _mm512_set1_epi16(1 << 1);
18588        let count = _mm512_set1_epi16(2);
18589        let r = _mm512_srlv_epi16(a, count);
18590        let e = _mm512_set1_epi16(0);
18591        assert_eq_m512i(r, e);
18592    }
18593
18594    #[simd_test(enable = "avx512bw")]
18595    const fn test_mm512_mask_srlv_epi16() {
18596        let a = _mm512_set1_epi16(1 << 1);
18597        let count = _mm512_set1_epi16(2);
18598        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
18599        assert_eq_m512i(r, a);
18600        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18601        let e = _mm512_set1_epi16(0);
18602        assert_eq_m512i(r, e);
18603    }
18604
18605    #[simd_test(enable = "avx512bw")]
18606    const fn test_mm512_maskz_srlv_epi16() {
18607        let a = _mm512_set1_epi16(1 << 1);
18608        let count = _mm512_set1_epi16(2);
18609        let r = _mm512_maskz_srlv_epi16(0, a, count);
18610        assert_eq_m512i(r, _mm512_setzero_si512());
18611        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18612        let e = _mm512_set1_epi16(0);
18613        assert_eq_m512i(r, e);
18614    }
18615
18616    #[simd_test(enable = "avx512bw,avx512vl")]
18617    const fn test_mm256_srlv_epi16() {
18618        let a = _mm256_set1_epi16(1 << 1);
18619        let count = _mm256_set1_epi16(2);
18620        let r = _mm256_srlv_epi16(a, count);
18621        let e = _mm256_set1_epi16(0);
18622        assert_eq_m256i(r, e);
18623    }
18624
18625    #[simd_test(enable = "avx512bw,avx512vl")]
18626    const fn test_mm256_mask_srlv_epi16() {
18627        let a = _mm256_set1_epi16(1 << 1);
18628        let count = _mm256_set1_epi16(2);
18629        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
18630        assert_eq_m256i(r, a);
18631        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
18632        let e = _mm256_set1_epi16(0);
18633        assert_eq_m256i(r, e);
18634    }
18635
18636    #[simd_test(enable = "avx512bw,avx512vl")]
18637    const fn test_mm256_maskz_srlv_epi16() {
18638        let a = _mm256_set1_epi16(1 << 1);
18639        let count = _mm256_set1_epi16(2);
18640        let r = _mm256_maskz_srlv_epi16(0, a, count);
18641        assert_eq_m256i(r, _mm256_setzero_si256());
18642        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
18643        let e = _mm256_set1_epi16(0);
18644        assert_eq_m256i(r, e);
18645    }
18646
18647    #[simd_test(enable = "avx512bw,avx512vl")]
18648    const fn test_mm_srlv_epi16() {
18649        let a = _mm_set1_epi16(1 << 1);
18650        let count = _mm_set1_epi16(2);
18651        let r = _mm_srlv_epi16(a, count);
18652        let e = _mm_set1_epi16(0);
18653        assert_eq_m128i(r, e);
18654    }
18655
18656    #[simd_test(enable = "avx512bw,avx512vl")]
18657    const fn test_mm_mask_srlv_epi16() {
18658        let a = _mm_set1_epi16(1 << 1);
18659        let count = _mm_set1_epi16(2);
18660        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18661        assert_eq_m128i(r, a);
18662        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18663        let e = _mm_set1_epi16(0);
18664        assert_eq_m128i(r, e);
18665    }
18666
18667    #[simd_test(enable = "avx512bw,avx512vl")]
18668    const fn test_mm_maskz_srlv_epi16() {
18669        let a = _mm_set1_epi16(1 << 1);
18670        let count = _mm_set1_epi16(2);
18671        let r = _mm_maskz_srlv_epi16(0, a, count);
18672        assert_eq_m128i(r, _mm_setzero_si128());
18673        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18674        let e = _mm_set1_epi16(0);
18675        assert_eq_m128i(r, e);
18676    }
18677
18678    #[simd_test(enable = "avx512bw")]
18679    fn test_mm512_sra_epi16() {
18680        let a = _mm512_set1_epi16(8);
18681        let count = _mm_set1_epi16(1);
18682        let r = _mm512_sra_epi16(a, count);
18683        let e = _mm512_set1_epi16(0);
18684        assert_eq_m512i(r, e);
18685    }
18686
18687    #[simd_test(enable = "avx512bw")]
18688    fn test_mm512_mask_sra_epi16() {
18689        let a = _mm512_set1_epi16(8);
18690        let count = _mm_set1_epi16(1);
18691        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18692        assert_eq_m512i(r, a);
18693        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18694        let e = _mm512_set1_epi16(0);
18695        assert_eq_m512i(r, e);
18696    }
18697
18698    #[simd_test(enable = "avx512bw")]
18699    fn test_mm512_maskz_sra_epi16() {
18700        let a = _mm512_set1_epi16(8);
18701        let count = _mm_set1_epi16(1);
18702        let r = _mm512_maskz_sra_epi16(0, a, count);
18703        assert_eq_m512i(r, _mm512_setzero_si512());
18704        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18705        let e = _mm512_set1_epi16(0);
18706        assert_eq_m512i(r, e);
18707    }
18708
18709    #[simd_test(enable = "avx512bw,avx512vl")]
18710    fn test_mm256_mask_sra_epi16() {
18711        let a = _mm256_set1_epi16(8);
18712        let count = _mm_set1_epi16(1);
18713        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18714        assert_eq_m256i(r, a);
18715        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18716        let e = _mm256_set1_epi16(0);
18717        assert_eq_m256i(r, e);
18718    }
18719
18720    #[simd_test(enable = "avx512bw,avx512vl")]
18721    fn test_mm256_maskz_sra_epi16() {
18722        let a = _mm256_set1_epi16(8);
18723        let count = _mm_set1_epi16(1);
18724        let r = _mm256_maskz_sra_epi16(0, a, count);
18725        assert_eq_m256i(r, _mm256_setzero_si256());
18726        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18727        let e = _mm256_set1_epi16(0);
18728        assert_eq_m256i(r, e);
18729    }
18730
18731    #[simd_test(enable = "avx512bw,avx512vl")]
18732    fn test_mm_mask_sra_epi16() {
18733        let a = _mm_set1_epi16(8);
18734        let count = _mm_set1_epi16(1);
18735        let r = _mm_mask_sra_epi16(a, 0, a, count);
18736        assert_eq_m128i(r, a);
18737        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18738        let e = _mm_set1_epi16(0);
18739        assert_eq_m128i(r, e);
18740    }
18741
18742    #[simd_test(enable = "avx512bw,avx512vl")]
18743    fn test_mm_maskz_sra_epi16() {
18744        let a = _mm_set1_epi16(8);
18745        let count = _mm_set1_epi16(1);
18746        let r = _mm_maskz_sra_epi16(0, a, count);
18747        assert_eq_m128i(r, _mm_setzero_si128());
18748        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18749        let e = _mm_set1_epi16(0);
18750        assert_eq_m128i(r, e);
18751    }
18752
18753    #[simd_test(enable = "avx512bw")]
18754    const fn test_mm512_srai_epi16() {
18755        let a = _mm512_set1_epi16(8);
18756        let r = _mm512_srai_epi16::<2>(a);
18757        let e = _mm512_set1_epi16(2);
18758        assert_eq_m512i(r, e);
18759    }
18760
18761    #[simd_test(enable = "avx512bw")]
18762    const fn test_mm512_mask_srai_epi16() {
18763        let a = _mm512_set1_epi16(8);
18764        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18765        assert_eq_m512i(r, a);
18766        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18767        let e = _mm512_set1_epi16(2);
18768        assert_eq_m512i(r, e);
18769    }
18770
18771    #[simd_test(enable = "avx512bw")]
18772    const fn test_mm512_maskz_srai_epi16() {
18773        let a = _mm512_set1_epi16(8);
18774        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18775        assert_eq_m512i(r, _mm512_setzero_si512());
18776        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18777        let e = _mm512_set1_epi16(2);
18778        assert_eq_m512i(r, e);
18779    }
18780
18781    #[simd_test(enable = "avx512bw,avx512vl")]
18782    const fn test_mm256_mask_srai_epi16() {
18783        let a = _mm256_set1_epi16(8);
18784        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18785        assert_eq_m256i(r, a);
18786        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18787        let e = _mm256_set1_epi16(2);
18788        assert_eq_m256i(r, e);
18789    }
18790
18791    #[simd_test(enable = "avx512bw,avx512vl")]
18792    const fn test_mm256_maskz_srai_epi16() {
18793        let a = _mm256_set1_epi16(8);
18794        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18795        assert_eq_m256i(r, _mm256_setzero_si256());
18796        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18797        let e = _mm256_set1_epi16(2);
18798        assert_eq_m256i(r, e);
18799    }
18800
18801    #[simd_test(enable = "avx512bw,avx512vl")]
18802    const fn test_mm_mask_srai_epi16() {
18803        let a = _mm_set1_epi16(8);
18804        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
18805        assert_eq_m128i(r, a);
18806        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
18807        let e = _mm_set1_epi16(2);
18808        assert_eq_m128i(r, e);
18809    }
18810
18811    #[simd_test(enable = "avx512bw,avx512vl")]
18812    const fn test_mm_maskz_srai_epi16() {
18813        let a = _mm_set1_epi16(8);
18814        let r = _mm_maskz_srai_epi16::<2>(0, a);
18815        assert_eq_m128i(r, _mm_setzero_si128());
18816        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
18817        let e = _mm_set1_epi16(2);
18818        assert_eq_m128i(r, e);
18819    }
18820
18821    #[simd_test(enable = "avx512bw")]
18822    const fn test_mm512_srav_epi16() {
18823        let a = _mm512_set1_epi16(8);
18824        let count = _mm512_set1_epi16(2);
18825        let r = _mm512_srav_epi16(a, count);
18826        let e = _mm512_set1_epi16(2);
18827        assert_eq_m512i(r, e);
18828    }
18829
18830    #[simd_test(enable = "avx512bw")]
18831    const fn test_mm512_mask_srav_epi16() {
18832        let a = _mm512_set1_epi16(8);
18833        let count = _mm512_set1_epi16(2);
18834        let r = _mm512_mask_srav_epi16(a, 0, a, count);
18835        assert_eq_m512i(r, a);
18836        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18837        let e = _mm512_set1_epi16(2);
18838        assert_eq_m512i(r, e);
18839    }
18840
18841    #[simd_test(enable = "avx512bw")]
18842    const fn test_mm512_maskz_srav_epi16() {
18843        let a = _mm512_set1_epi16(8);
18844        let count = _mm512_set1_epi16(2);
18845        let r = _mm512_maskz_srav_epi16(0, a, count);
18846        assert_eq_m512i(r, _mm512_setzero_si512());
18847        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
18848        let e = _mm512_set1_epi16(2);
18849        assert_eq_m512i(r, e);
18850    }
18851
18852    #[simd_test(enable = "avx512bw,avx512vl")]
18853    const fn test_mm256_srav_epi16() {
18854        let a = _mm256_set1_epi16(8);
18855        let count = _mm256_set1_epi16(2);
18856        let r = _mm256_srav_epi16(a, count);
18857        let e = _mm256_set1_epi16(2);
18858        assert_eq_m256i(r, e);
18859    }
18860
18861    #[simd_test(enable = "avx512bw,avx512vl")]
18862    const fn test_mm256_mask_srav_epi16() {
18863        let a = _mm256_set1_epi16(8);
18864        let count = _mm256_set1_epi16(2);
18865        let r = _mm256_mask_srav_epi16(a, 0, a, count);
18866        assert_eq_m256i(r, a);
18867        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
18868        let e = _mm256_set1_epi16(2);
18869        assert_eq_m256i(r, e);
18870    }
18871
18872    #[simd_test(enable = "avx512bw,avx512vl")]
18873    const fn test_mm256_maskz_srav_epi16() {
18874        let a = _mm256_set1_epi16(8);
18875        let count = _mm256_set1_epi16(2);
18876        let r = _mm256_maskz_srav_epi16(0, a, count);
18877        assert_eq_m256i(r, _mm256_setzero_si256());
18878        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18879        let e = _mm256_set1_epi16(2);
18880        assert_eq_m256i(r, e);
18881    }
18882
18883    #[simd_test(enable = "avx512bw,avx512vl")]
18884    const fn test_mm_srav_epi16() {
18885        let a = _mm_set1_epi16(8);
18886        let count = _mm_set1_epi16(2);
18887        let r = _mm_srav_epi16(a, count);
18888        let e = _mm_set1_epi16(2);
18889        assert_eq_m128i(r, e);
18890    }
18891
18892    #[simd_test(enable = "avx512bw,avx512vl")]
18893    const fn test_mm_mask_srav_epi16() {
18894        let a = _mm_set1_epi16(8);
18895        let count = _mm_set1_epi16(2);
18896        let r = _mm_mask_srav_epi16(a, 0, a, count);
18897        assert_eq_m128i(r, a);
18898        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18899        let e = _mm_set1_epi16(2);
18900        assert_eq_m128i(r, e);
18901    }
18902
18903    #[simd_test(enable = "avx512bw,avx512vl")]
18904    const fn test_mm_maskz_srav_epi16() {
18905        let a = _mm_set1_epi16(8);
18906        let count = _mm_set1_epi16(2);
18907        let r = _mm_maskz_srav_epi16(0, a, count);
18908        assert_eq_m128i(r, _mm_setzero_si128());
18909        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18910        let e = _mm_set1_epi16(2);
18911        assert_eq_m128i(r, e);
18912    }
18913
18914    #[simd_test(enable = "avx512bw")]
18915    fn test_mm512_permutex2var_epi16() {
18916        #[rustfmt::skip]
18917        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18918                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18919        #[rustfmt::skip]
18920        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18921                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18922        let b = _mm512_set1_epi16(100);
18923        let r = _mm512_permutex2var_epi16(a, idx, b);
18924        #[rustfmt::skip]
18925        let e = _mm512_set_epi16(
18926            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18927            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18928        );
18929        assert_eq_m512i(r, e);
18930    }
18931
18932    #[simd_test(enable = "avx512bw")]
18933    fn test_mm512_mask_permutex2var_epi16() {
18934        #[rustfmt::skip]
18935        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18936                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18937        #[rustfmt::skip]
18938        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18939                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18940        let b = _mm512_set1_epi16(100);
18941        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18942        assert_eq_m512i(r, a);
18943        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18944        #[rustfmt::skip]
18945        let e = _mm512_set_epi16(
18946            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18947            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18948        );
18949        assert_eq_m512i(r, e);
18950    }
18951
18952    #[simd_test(enable = "avx512bw")]
18953    fn test_mm512_maskz_permutex2var_epi16() {
18954        #[rustfmt::skip]
18955        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18956                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18957        #[rustfmt::skip]
18958        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18959                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18960        let b = _mm512_set1_epi16(100);
18961        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18962        assert_eq_m512i(r, _mm512_setzero_si512());
18963        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18964        #[rustfmt::skip]
18965        let e = _mm512_set_epi16(
18966            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18967            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18968        );
18969        assert_eq_m512i(r, e);
18970    }
18971
18972    #[simd_test(enable = "avx512bw")]
18973    fn test_mm512_mask2_permutex2var_epi16() {
18974        #[rustfmt::skip]
18975        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18976                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18977        #[rustfmt::skip]
18978        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18979                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18980        let b = _mm512_set1_epi16(100);
18981        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18982        assert_eq_m512i(r, idx);
18983        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18984        #[rustfmt::skip]
18985        let e = _mm512_set_epi16(
18986            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18987            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18988        );
18989        assert_eq_m512i(r, e);
18990    }
18991
18992    #[simd_test(enable = "avx512bw,avx512vl")]
18993    fn test_mm256_permutex2var_epi16() {
18994        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18995        #[rustfmt::skip]
18996        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18997        let b = _mm256_set1_epi16(100);
18998        let r = _mm256_permutex2var_epi16(a, idx, b);
18999        let e = _mm256_set_epi16(
19000            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19001        );
19002        assert_eq_m256i(r, e);
19003    }
19004
19005    #[simd_test(enable = "avx512bw,avx512vl")]
19006    fn test_mm256_mask_permutex2var_epi16() {
19007        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19008        #[rustfmt::skip]
19009        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19010        let b = _mm256_set1_epi16(100);
19011        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
19012        assert_eq_m256i(r, a);
19013        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
19014        let e = _mm256_set_epi16(
19015            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19016        );
19017        assert_eq_m256i(r, e);
19018    }
19019
19020    #[simd_test(enable = "avx512bw,avx512vl")]
19021    fn test_mm256_maskz_permutex2var_epi16() {
19022        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19023        #[rustfmt::skip]
19024        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19025        let b = _mm256_set1_epi16(100);
19026        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
19027        assert_eq_m256i(r, _mm256_setzero_si256());
19028        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
19029        let e = _mm256_set_epi16(
19030            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19031        );
19032        assert_eq_m256i(r, e);
19033    }
19034
19035    #[simd_test(enable = "avx512bw,avx512vl")]
19036    fn test_mm256_mask2_permutex2var_epi16() {
19037        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19038        #[rustfmt::skip]
19039        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19040        let b = _mm256_set1_epi16(100);
19041        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
19042        assert_eq_m256i(r, idx);
19043        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
19044        #[rustfmt::skip]
19045        let e = _mm256_set_epi16(
19046            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19047        );
19048        assert_eq_m256i(r, e);
19049    }
19050
19051    #[simd_test(enable = "avx512bw,avx512vl")]
19052    fn test_mm_permutex2var_epi16() {
19053        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19054        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19055        let b = _mm_set1_epi16(100);
19056        let r = _mm_permutex2var_epi16(a, idx, b);
19057        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19058        assert_eq_m128i(r, e);
19059    }
19060
19061    #[simd_test(enable = "avx512bw,avx512vl")]
19062    fn test_mm_mask_permutex2var_epi16() {
19063        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19064        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19065        let b = _mm_set1_epi16(100);
19066        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
19067        assert_eq_m128i(r, a);
19068        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
19069        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19070        assert_eq_m128i(r, e);
19071    }
19072
19073    #[simd_test(enable = "avx512bw,avx512vl")]
19074    fn test_mm_maskz_permutex2var_epi16() {
19075        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19076        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19077        let b = _mm_set1_epi16(100);
19078        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
19079        assert_eq_m128i(r, _mm_setzero_si128());
19080        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
19081        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19082        assert_eq_m128i(r, e);
19083    }
19084
19085    #[simd_test(enable = "avx512bw,avx512vl")]
19086    fn test_mm_mask2_permutex2var_epi16() {
19087        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19088        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19089        let b = _mm_set1_epi16(100);
19090        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
19091        assert_eq_m128i(r, idx);
19092        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
19093        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19094        assert_eq_m128i(r, e);
19095    }
19096
19097    #[simd_test(enable = "avx512bw")]
19098    fn test_mm512_permutexvar_epi16() {
19099        let idx = _mm512_set1_epi16(1);
19100        #[rustfmt::skip]
19101        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19102                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19103        let r = _mm512_permutexvar_epi16(idx, a);
19104        let e = _mm512_set1_epi16(30);
19105        assert_eq_m512i(r, e);
19106    }
19107
19108    #[simd_test(enable = "avx512bw")]
19109    fn test_mm512_mask_permutexvar_epi16() {
19110        let idx = _mm512_set1_epi16(1);
19111        #[rustfmt::skip]
19112        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19113                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19114        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
19115        assert_eq_m512i(r, a);
19116        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
19117        let e = _mm512_set1_epi16(30);
19118        assert_eq_m512i(r, e);
19119    }
19120
19121    #[simd_test(enable = "avx512bw")]
19122    fn test_mm512_maskz_permutexvar_epi16() {
19123        let idx = _mm512_set1_epi16(1);
19124        #[rustfmt::skip]
19125        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19126                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19127        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
19128        assert_eq_m512i(r, _mm512_setzero_si512());
19129        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
19130        let e = _mm512_set1_epi16(30);
19131        assert_eq_m512i(r, e);
19132    }
19133
19134    #[simd_test(enable = "avx512bw,avx512vl")]
19135    fn test_mm256_permutexvar_epi16() {
19136        let idx = _mm256_set1_epi16(1);
19137        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19138        let r = _mm256_permutexvar_epi16(idx, a);
19139        let e = _mm256_set1_epi16(14);
19140        assert_eq_m256i(r, e);
19141    }
19142
19143    #[simd_test(enable = "avx512bw,avx512vl")]
19144    fn test_mm256_mask_permutexvar_epi16() {
19145        let idx = _mm256_set1_epi16(1);
19146        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19147        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
19148        assert_eq_m256i(r, a);
19149        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
19150        let e = _mm256_set1_epi16(14);
19151        assert_eq_m256i(r, e);
19152    }
19153
19154    #[simd_test(enable = "avx512bw,avx512vl")]
19155    fn test_mm256_maskz_permutexvar_epi16() {
19156        let idx = _mm256_set1_epi16(1);
19157        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19158        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
19159        assert_eq_m256i(r, _mm256_setzero_si256());
19160        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
19161        let e = _mm256_set1_epi16(14);
19162        assert_eq_m256i(r, e);
19163    }
19164
19165    #[simd_test(enable = "avx512bw,avx512vl")]
19166    fn test_mm_permutexvar_epi16() {
19167        let idx = _mm_set1_epi16(1);
19168        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19169        let r = _mm_permutexvar_epi16(idx, a);
19170        let e = _mm_set1_epi16(6);
19171        assert_eq_m128i(r, e);
19172    }
19173
19174    #[simd_test(enable = "avx512bw,avx512vl")]
19175    fn test_mm_mask_permutexvar_epi16() {
19176        let idx = _mm_set1_epi16(1);
19177        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19178        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
19179        assert_eq_m128i(r, a);
19180        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
19181        let e = _mm_set1_epi16(6);
19182        assert_eq_m128i(r, e);
19183    }
19184
19185    #[simd_test(enable = "avx512bw,avx512vl")]
19186    fn test_mm_maskz_permutexvar_epi16() {
19187        let idx = _mm_set1_epi16(1);
19188        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19189        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
19190        assert_eq_m128i(r, _mm_setzero_si128());
19191        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
19192        let e = _mm_set1_epi16(6);
19193        assert_eq_m128i(r, e);
19194    }
19195
19196    #[simd_test(enable = "avx512bw")]
19197    const fn test_mm512_mask_blend_epi16() {
19198        let a = _mm512_set1_epi16(1);
19199        let b = _mm512_set1_epi16(2);
19200        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
19201        #[rustfmt::skip]
19202        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19203                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19204        assert_eq_m512i(r, e);
19205    }
19206
19207    #[simd_test(enable = "avx512bw,avx512vl")]
19208    const fn test_mm256_mask_blend_epi16() {
19209        let a = _mm256_set1_epi16(1);
19210        let b = _mm256_set1_epi16(2);
19211        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
19212        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19213        assert_eq_m256i(r, e);
19214    }
19215
19216    #[simd_test(enable = "avx512bw,avx512vl")]
19217    const fn test_mm_mask_blend_epi16() {
19218        let a = _mm_set1_epi16(1);
19219        let b = _mm_set1_epi16(2);
19220        let r = _mm_mask_blend_epi16(0b11110000, a, b);
19221        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
19222        assert_eq_m128i(r, e);
19223    }
19224
19225    #[simd_test(enable = "avx512bw")]
19226    const fn test_mm512_mask_blend_epi8() {
19227        let a = _mm512_set1_epi8(1);
19228        let b = _mm512_set1_epi8(2);
19229        let r = _mm512_mask_blend_epi8(
19230            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
19231            a,
19232            b,
19233        );
19234        #[rustfmt::skip]
19235        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19236                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19237                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19238                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19239        assert_eq_m512i(r, e);
19240    }
19241
19242    #[simd_test(enable = "avx512bw,avx512vl")]
19243    const fn test_mm256_mask_blend_epi8() {
19244        let a = _mm256_set1_epi8(1);
19245        let b = _mm256_set1_epi8(2);
19246        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
19247        #[rustfmt::skip]
19248        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19249                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19250        assert_eq_m256i(r, e);
19251    }
19252
19253    #[simd_test(enable = "avx512bw,avx512vl")]
19254    const fn test_mm_mask_blend_epi8() {
19255        let a = _mm_set1_epi8(1);
19256        let b = _mm_set1_epi8(2);
19257        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
19258        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19259        assert_eq_m128i(r, e);
19260    }
19261
19262    #[simd_test(enable = "avx512bw")]
19263    const fn test_mm512_broadcastw_epi16() {
19264        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19265        let r = _mm512_broadcastw_epi16(a);
19266        let e = _mm512_set1_epi16(24);
19267        assert_eq_m512i(r, e);
19268    }
19269
19270    #[simd_test(enable = "avx512bw")]
19271    const fn test_mm512_mask_broadcastw_epi16() {
19272        let src = _mm512_set1_epi16(1);
19273        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19274        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
19275        assert_eq_m512i(r, src);
19276        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19277        let e = _mm512_set1_epi16(24);
19278        assert_eq_m512i(r, e);
19279    }
19280
19281    #[simd_test(enable = "avx512bw")]
19282    const fn test_mm512_maskz_broadcastw_epi16() {
19283        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19284        let r = _mm512_maskz_broadcastw_epi16(0, a);
19285        assert_eq_m512i(r, _mm512_setzero_si512());
19286        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
19287        let e = _mm512_set1_epi16(24);
19288        assert_eq_m512i(r, e);
19289    }
19290
19291    #[simd_test(enable = "avx512bw,avx512vl")]
19292    const fn test_mm256_mask_broadcastw_epi16() {
19293        let src = _mm256_set1_epi16(1);
19294        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19295        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
19296        assert_eq_m256i(r, src);
19297        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
19298        let e = _mm256_set1_epi16(24);
19299        assert_eq_m256i(r, e);
19300    }
19301
19302    #[simd_test(enable = "avx512bw,avx512vl")]
19303    const fn test_mm256_maskz_broadcastw_epi16() {
19304        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19305        let r = _mm256_maskz_broadcastw_epi16(0, a);
19306        assert_eq_m256i(r, _mm256_setzero_si256());
19307        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
19308        let e = _mm256_set1_epi16(24);
19309        assert_eq_m256i(r, e);
19310    }
19311
19312    #[simd_test(enable = "avx512bw,avx512vl")]
19313    const fn test_mm_mask_broadcastw_epi16() {
19314        let src = _mm_set1_epi16(1);
19315        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19316        let r = _mm_mask_broadcastw_epi16(src, 0, a);
19317        assert_eq_m128i(r, src);
19318        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
19319        let e = _mm_set1_epi16(24);
19320        assert_eq_m128i(r, e);
19321    }
19322
19323    #[simd_test(enable = "avx512bw,avx512vl")]
19324    const fn test_mm_maskz_broadcastw_epi16() {
19325        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19326        let r = _mm_maskz_broadcastw_epi16(0, a);
19327        assert_eq_m128i(r, _mm_setzero_si128());
19328        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
19329        let e = _mm_set1_epi16(24);
19330        assert_eq_m128i(r, e);
19331    }
19332
19333    #[simd_test(enable = "avx512bw")]
19334    const fn test_mm512_broadcastb_epi8() {
19335        let a = _mm_set_epi8(
19336            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19337        );
19338        let r = _mm512_broadcastb_epi8(a);
19339        let e = _mm512_set1_epi8(32);
19340        assert_eq_m512i(r, e);
19341    }
19342
19343    #[simd_test(enable = "avx512bw")]
19344    const fn test_mm512_mask_broadcastb_epi8() {
19345        let src = _mm512_set1_epi8(1);
19346        let a = _mm_set_epi8(
19347            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19348        );
19349        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
19350        assert_eq_m512i(r, src);
19351        let r = _mm512_mask_broadcastb_epi8(
19352            src,
19353            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19354            a,
19355        );
19356        let e = _mm512_set1_epi8(32);
19357        assert_eq_m512i(r, e);
19358    }
19359
19360    #[simd_test(enable = "avx512bw")]
19361    const fn test_mm512_maskz_broadcastb_epi8() {
19362        let a = _mm_set_epi8(
19363            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19364        );
19365        let r = _mm512_maskz_broadcastb_epi8(0, a);
19366        assert_eq_m512i(r, _mm512_setzero_si512());
19367        let r = _mm512_maskz_broadcastb_epi8(
19368            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19369            a,
19370        );
19371        let e = _mm512_set1_epi8(32);
19372        assert_eq_m512i(r, e);
19373    }
19374
19375    #[simd_test(enable = "avx512bw,avx512vl")]
19376    const fn test_mm256_mask_broadcastb_epi8() {
19377        let src = _mm256_set1_epi8(1);
19378        let a = _mm_set_epi8(
19379            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19380        );
19381        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
19382        assert_eq_m256i(r, src);
19383        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19384        let e = _mm256_set1_epi8(32);
19385        assert_eq_m256i(r, e);
19386    }
19387
19388    #[simd_test(enable = "avx512bw,avx512vl")]
19389    const fn test_mm256_maskz_broadcastb_epi8() {
19390        let a = _mm_set_epi8(
19391            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19392        );
19393        let r = _mm256_maskz_broadcastb_epi8(0, a);
19394        assert_eq_m256i(r, _mm256_setzero_si256());
19395        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
19396        let e = _mm256_set1_epi8(32);
19397        assert_eq_m256i(r, e);
19398    }
19399
19400    #[simd_test(enable = "avx512bw,avx512vl")]
19401    const fn test_mm_mask_broadcastb_epi8() {
19402        let src = _mm_set1_epi8(1);
19403        let a = _mm_set_epi8(
19404            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19405        );
19406        let r = _mm_mask_broadcastb_epi8(src, 0, a);
19407        assert_eq_m128i(r, src);
19408        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
19409        let e = _mm_set1_epi8(32);
19410        assert_eq_m128i(r, e);
19411    }
19412
19413    #[simd_test(enable = "avx512bw,avx512vl")]
19414    const fn test_mm_maskz_broadcastb_epi8() {
19415        let a = _mm_set_epi8(
19416            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19417        );
19418        let r = _mm_maskz_broadcastb_epi8(0, a);
19419        assert_eq_m128i(r, _mm_setzero_si128());
19420        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
19421        let e = _mm_set1_epi8(32);
19422        assert_eq_m128i(r, e);
19423    }
19424
19425    #[simd_test(enable = "avx512bw")]
19426    const fn test_mm512_unpackhi_epi16() {
19427        #[rustfmt::skip]
19428        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19429                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19430        #[rustfmt::skip]
19431        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19432                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19433        let r = _mm512_unpackhi_epi16(a, b);
19434        #[rustfmt::skip]
19435        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19436                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19437        assert_eq_m512i(r, e);
19438    }
19439
19440    #[simd_test(enable = "avx512bw")]
19441    const fn test_mm512_mask_unpackhi_epi16() {
19442        #[rustfmt::skip]
19443        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19444                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19445        #[rustfmt::skip]
19446        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19447                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19448        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
19449        assert_eq_m512i(r, a);
19450        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19451        #[rustfmt::skip]
19452        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19453                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19454        assert_eq_m512i(r, e);
19455    }
19456
19457    #[simd_test(enable = "avx512bw")]
19458    const fn test_mm512_maskz_unpackhi_epi16() {
19459        #[rustfmt::skip]
19460        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19461                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19462        #[rustfmt::skip]
19463        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19464                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19465        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
19466        assert_eq_m512i(r, _mm512_setzero_si512());
19467        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
19468        #[rustfmt::skip]
19469        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19470                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19471        assert_eq_m512i(r, e);
19472    }
19473
19474    #[simd_test(enable = "avx512bw,avx512vl")]
19475    const fn test_mm256_mask_unpackhi_epi16() {
19476        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19477        let b = _mm256_set_epi16(
19478            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19479        );
19480        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
19481        assert_eq_m256i(r, a);
19482        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
19483        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19484        assert_eq_m256i(r, e);
19485    }
19486
19487    #[simd_test(enable = "avx512bw,avx512vl")]
19488    const fn test_mm256_maskz_unpackhi_epi16() {
19489        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19490        let b = _mm256_set_epi16(
19491            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19492        );
19493        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
19494        assert_eq_m256i(r, _mm256_setzero_si256());
19495        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
19496        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19497        assert_eq_m256i(r, e);
19498    }
19499
19500    #[simd_test(enable = "avx512bw,avx512vl")]
19501    const fn test_mm_mask_unpackhi_epi16() {
19502        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19503        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19504        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
19505        assert_eq_m128i(r, a);
19506        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
19507        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19508        assert_eq_m128i(r, e);
19509    }
19510
19511    #[simd_test(enable = "avx512bw,avx512vl")]
19512    const fn test_mm_maskz_unpackhi_epi16() {
19513        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19514        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19515        let r = _mm_maskz_unpackhi_epi16(0, a, b);
19516        assert_eq_m128i(r, _mm_setzero_si128());
19517        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
19518        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19519        assert_eq_m128i(r, e);
19520    }
19521
19522    #[simd_test(enable = "avx512bw")]
19523    const fn test_mm512_unpackhi_epi8() {
19524        #[rustfmt::skip]
19525        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19526                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19527                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19528                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19529        #[rustfmt::skip]
19530        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19531                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19532                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19533                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19534        let r = _mm512_unpackhi_epi8(a, b);
19535        #[rustfmt::skip]
19536        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19537                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19538                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19539                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19540        assert_eq_m512i(r, e);
19541    }
19542
19543    #[simd_test(enable = "avx512bw")]
19544    const fn test_mm512_mask_unpackhi_epi8() {
19545        #[rustfmt::skip]
19546        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19547                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19548                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19549                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19550        #[rustfmt::skip]
19551        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19552                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19553                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19554                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19555        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
19556        assert_eq_m512i(r, a);
19557        let r = _mm512_mask_unpackhi_epi8(
19558            a,
19559            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19560            a,
19561            b,
19562        );
19563        #[rustfmt::skip]
19564        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19565                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19566                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19567                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19568        assert_eq_m512i(r, e);
19569    }
19570
19571    #[simd_test(enable = "avx512bw")]
19572    const fn test_mm512_maskz_unpackhi_epi8() {
19573        #[rustfmt::skip]
19574        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19575                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19576                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19577                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19578        #[rustfmt::skip]
19579        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19580                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19581                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19582                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19583        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
19584        assert_eq_m512i(r, _mm512_setzero_si512());
19585        let r = _mm512_maskz_unpackhi_epi8(
19586            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19587            a,
19588            b,
19589        );
19590        #[rustfmt::skip]
19591        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19592                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19593                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19594                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19595        assert_eq_m512i(r, e);
19596    }
19597
19598    #[simd_test(enable = "avx512bw,avx512vl")]
19599    const fn test_mm256_mask_unpackhi_epi8() {
19600        #[rustfmt::skip]
19601        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19602                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19603        #[rustfmt::skip]
19604        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19605                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19606        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
19607        assert_eq_m256i(r, a);
19608        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19609        #[rustfmt::skip]
19610        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19611                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19612        assert_eq_m256i(r, e);
19613    }
19614
19615    #[simd_test(enable = "avx512bw,avx512vl")]
19616    const fn test_mm256_maskz_unpackhi_epi8() {
19617        #[rustfmt::skip]
19618        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19619                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19620        #[rustfmt::skip]
19621        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19622                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19623        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
19624        assert_eq_m256i(r, _mm256_setzero_si256());
19625        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
19626        #[rustfmt::skip]
19627        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19628                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19629        assert_eq_m256i(r, e);
19630    }
19631
19632    #[simd_test(enable = "avx512bw,avx512vl")]
19633    const fn test_mm_mask_unpackhi_epi8() {
19634        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19635        let b = _mm_set_epi8(
19636            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19637        );
19638        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
19639        assert_eq_m128i(r, a);
19640        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
19641        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19642        assert_eq_m128i(r, e);
19643    }
19644
19645    #[simd_test(enable = "avx512bw,avx512vl")]
19646    const fn test_mm_maskz_unpackhi_epi8() {
19647        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19648        let b = _mm_set_epi8(
19649            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19650        );
19651        let r = _mm_maskz_unpackhi_epi8(0, a, b);
19652        assert_eq_m128i(r, _mm_setzero_si128());
19653        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
19654        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19655        assert_eq_m128i(r, e);
19656    }
19657
19658    #[simd_test(enable = "avx512bw")]
19659    const fn test_mm512_unpacklo_epi16() {
19660        #[rustfmt::skip]
19661        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19662                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19663        #[rustfmt::skip]
19664        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19665                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19666        let r = _mm512_unpacklo_epi16(a, b);
19667        #[rustfmt::skip]
19668        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19669                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19670        assert_eq_m512i(r, e);
19671    }
19672
19673    #[simd_test(enable = "avx512bw")]
19674    const fn test_mm512_mask_unpacklo_epi16() {
19675        #[rustfmt::skip]
19676        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19677                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19678        #[rustfmt::skip]
19679        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19680                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19681        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19682        assert_eq_m512i(r, a);
19683        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19684        #[rustfmt::skip]
19685        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19686                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19687        assert_eq_m512i(r, e);
19688    }
19689
19690    #[simd_test(enable = "avx512bw")]
19691    const fn test_mm512_maskz_unpacklo_epi16() {
19692        #[rustfmt::skip]
19693        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19694                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19695        #[rustfmt::skip]
19696        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19697                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19698        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19699        assert_eq_m512i(r, _mm512_setzero_si512());
19700        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19701        #[rustfmt::skip]
19702        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19703                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19704        assert_eq_m512i(r, e);
19705    }
19706
19707    #[simd_test(enable = "avx512bw,avx512vl")]
19708    const fn test_mm256_mask_unpacklo_epi16() {
19709        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19710        let b = _mm256_set_epi16(
19711            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19712        );
19713        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19714        assert_eq_m256i(r, a);
19715        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19716        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19717        assert_eq_m256i(r, e);
19718    }
19719
19720    #[simd_test(enable = "avx512bw,avx512vl")]
19721    const fn test_mm256_maskz_unpacklo_epi16() {
19722        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19723        let b = _mm256_set_epi16(
19724            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19725        );
19726        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19727        assert_eq_m256i(r, _mm256_setzero_si256());
19728        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19729        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19730        assert_eq_m256i(r, e);
19731    }
19732
19733    #[simd_test(enable = "avx512bw,avx512vl")]
19734    const fn test_mm_mask_unpacklo_epi16() {
19735        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19736        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19737        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19738        assert_eq_m128i(r, a);
19739        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19740        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19741        assert_eq_m128i(r, e);
19742    }
19743
19744    #[simd_test(enable = "avx512bw,avx512vl")]
19745    const fn test_mm_maskz_unpacklo_epi16() {
19746        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19747        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19748        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19749        assert_eq_m128i(r, _mm_setzero_si128());
19750        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19751        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19752        assert_eq_m128i(r, e);
19753    }
19754
19755    #[simd_test(enable = "avx512bw")]
19756    const fn test_mm512_unpacklo_epi8() {
19757        #[rustfmt::skip]
19758        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19759                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19760                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19761                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19762        #[rustfmt::skip]
19763        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19764                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19765                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19766                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19767        let r = _mm512_unpacklo_epi8(a, b);
19768        #[rustfmt::skip]
19769        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19770                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19771                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19772                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19773        assert_eq_m512i(r, e);
19774    }
19775
19776    #[simd_test(enable = "avx512bw")]
19777    const fn test_mm512_mask_unpacklo_epi8() {
19778        #[rustfmt::skip]
19779        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19780                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19781                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19782                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19783        #[rustfmt::skip]
19784        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19785                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19786                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19787                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19788        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19789        assert_eq_m512i(r, a);
19790        let r = _mm512_mask_unpacklo_epi8(
19791            a,
19792            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19793            a,
19794            b,
19795        );
19796        #[rustfmt::skip]
19797        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19798                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19799                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19800                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19801        assert_eq_m512i(r, e);
19802    }
19803
19804    #[simd_test(enable = "avx512bw")]
19805    const fn test_mm512_maskz_unpacklo_epi8() {
19806        #[rustfmt::skip]
19807        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19808                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19809                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19810                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19811        #[rustfmt::skip]
19812        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19813                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19814                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19815                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19816        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
19817        assert_eq_m512i(r, _mm512_setzero_si512());
19818        let r = _mm512_maskz_unpacklo_epi8(
19819            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19820            a,
19821            b,
19822        );
19823        #[rustfmt::skip]
19824        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19825                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19826                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19827                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19828        assert_eq_m512i(r, e);
19829    }
19830
19831    #[simd_test(enable = "avx512bw,avx512vl")]
19832    const fn test_mm256_mask_unpacklo_epi8() {
19833        #[rustfmt::skip]
19834        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19835                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19836        #[rustfmt::skip]
19837        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19838                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19839        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
19840        assert_eq_m256i(r, a);
19841        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19842        #[rustfmt::skip]
19843        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19844                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19845        assert_eq_m256i(r, e);
19846    }
19847
19848    #[simd_test(enable = "avx512bw,avx512vl")]
19849    const fn test_mm256_maskz_unpacklo_epi8() {
19850        #[rustfmt::skip]
19851        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19852                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19853        #[rustfmt::skip]
19854        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19855                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19856        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
19857        assert_eq_m256i(r, _mm256_setzero_si256());
19858        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
19859        #[rustfmt::skip]
19860        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19861                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19862        assert_eq_m256i(r, e);
19863    }
19864
19865    #[simd_test(enable = "avx512bw,avx512vl")]
19866    const fn test_mm_mask_unpacklo_epi8() {
19867        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19868        let b = _mm_set_epi8(
19869            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19870        );
19871        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
19872        assert_eq_m128i(r, a);
19873        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19874        let e = _mm_set_epi8(
19875            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19876        );
19877        assert_eq_m128i(r, e);
19878    }
19879
19880    #[simd_test(enable = "avx512bw,avx512vl")]
19881    const fn test_mm_maskz_unpacklo_epi8() {
19882        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19883        let b = _mm_set_epi8(
19884            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19885        );
19886        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19887        assert_eq_m128i(r, _mm_setzero_si128());
19888        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19889        let e = _mm_set_epi8(
19890            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19891        );
19892        assert_eq_m128i(r, e);
19893    }
19894
19895    #[simd_test(enable = "avx512bw")]
19896    const fn test_mm512_mask_mov_epi16() {
19897        let src = _mm512_set1_epi16(1);
19898        let a = _mm512_set1_epi16(2);
19899        let r = _mm512_mask_mov_epi16(src, 0, a);
19900        assert_eq_m512i(r, src);
19901        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19902        assert_eq_m512i(r, a);
19903    }
19904
19905    #[simd_test(enable = "avx512bw")]
19906    const fn test_mm512_maskz_mov_epi16() {
19907        let a = _mm512_set1_epi16(2);
19908        let r = _mm512_maskz_mov_epi16(0, a);
19909        assert_eq_m512i(r, _mm512_setzero_si512());
19910        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19911        assert_eq_m512i(r, a);
19912    }
19913
19914    #[simd_test(enable = "avx512bw,avx512vl")]
19915    const fn test_mm256_mask_mov_epi16() {
19916        let src = _mm256_set1_epi16(1);
19917        let a = _mm256_set1_epi16(2);
19918        let r = _mm256_mask_mov_epi16(src, 0, a);
19919        assert_eq_m256i(r, src);
19920        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19921        assert_eq_m256i(r, a);
19922    }
19923
19924    #[simd_test(enable = "avx512bw,avx512vl")]
19925    const fn test_mm256_maskz_mov_epi16() {
19926        let a = _mm256_set1_epi16(2);
19927        let r = _mm256_maskz_mov_epi16(0, a);
19928        assert_eq_m256i(r, _mm256_setzero_si256());
19929        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19930        assert_eq_m256i(r, a);
19931    }
19932
19933    #[simd_test(enable = "avx512bw,avx512vl")]
19934    const fn test_mm_mask_mov_epi16() {
19935        let src = _mm_set1_epi16(1);
19936        let a = _mm_set1_epi16(2);
19937        let r = _mm_mask_mov_epi16(src, 0, a);
19938        assert_eq_m128i(r, src);
19939        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19940        assert_eq_m128i(r, a);
19941    }
19942
19943    #[simd_test(enable = "avx512bw,avx512vl")]
19944    const fn test_mm_maskz_mov_epi16() {
19945        let a = _mm_set1_epi16(2);
19946        let r = _mm_maskz_mov_epi16(0, a);
19947        assert_eq_m128i(r, _mm_setzero_si128());
19948        let r = _mm_maskz_mov_epi16(0b11111111, a);
19949        assert_eq_m128i(r, a);
19950    }
19951
19952    #[simd_test(enable = "avx512bw")]
19953    const fn test_mm512_mask_mov_epi8() {
19954        let src = _mm512_set1_epi8(1);
19955        let a = _mm512_set1_epi8(2);
19956        let r = _mm512_mask_mov_epi8(src, 0, a);
19957        assert_eq_m512i(r, src);
19958        let r = _mm512_mask_mov_epi8(
19959            src,
19960            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19961            a,
19962        );
19963        assert_eq_m512i(r, a);
19964    }
19965
19966    #[simd_test(enable = "avx512bw")]
19967    const fn test_mm512_maskz_mov_epi8() {
19968        let a = _mm512_set1_epi8(2);
19969        let r = _mm512_maskz_mov_epi8(0, a);
19970        assert_eq_m512i(r, _mm512_setzero_si512());
19971        let r = _mm512_maskz_mov_epi8(
19972            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19973            a,
19974        );
19975        assert_eq_m512i(r, a);
19976    }
19977
19978    #[simd_test(enable = "avx512bw,avx512vl")]
19979    const fn test_mm256_mask_mov_epi8() {
19980        let src = _mm256_set1_epi8(1);
19981        let a = _mm256_set1_epi8(2);
19982        let r = _mm256_mask_mov_epi8(src, 0, a);
19983        assert_eq_m256i(r, src);
19984        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19985        assert_eq_m256i(r, a);
19986    }
19987
19988    #[simd_test(enable = "avx512bw,avx512vl")]
19989    const fn test_mm256_maskz_mov_epi8() {
19990        let a = _mm256_set1_epi8(2);
19991        let r = _mm256_maskz_mov_epi8(0, a);
19992        assert_eq_m256i(r, _mm256_setzero_si256());
19993        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19994        assert_eq_m256i(r, a);
19995    }
19996
19997    #[simd_test(enable = "avx512bw,avx512vl")]
19998    const fn test_mm_mask_mov_epi8() {
19999        let src = _mm_set1_epi8(1);
20000        let a = _mm_set1_epi8(2);
20001        let r = _mm_mask_mov_epi8(src, 0, a);
20002        assert_eq_m128i(r, src);
20003        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
20004        assert_eq_m128i(r, a);
20005    }
20006
20007    #[simd_test(enable = "avx512bw,avx512vl")]
20008    const fn test_mm_maskz_mov_epi8() {
20009        let a = _mm_set1_epi8(2);
20010        let r = _mm_maskz_mov_epi8(0, a);
20011        assert_eq_m128i(r, _mm_setzero_si128());
20012        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
20013        assert_eq_m128i(r, a);
20014    }
20015
20016    #[simd_test(enable = "avx512bw")]
20017    const fn test_mm512_mask_set1_epi16() {
20018        let src = _mm512_set1_epi16(2);
20019        let a: i16 = 11;
20020        let r = _mm512_mask_set1_epi16(src, 0, a);
20021        assert_eq_m512i(r, src);
20022        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20023        let e = _mm512_set1_epi16(11);
20024        assert_eq_m512i(r, e);
20025    }
20026
20027    #[simd_test(enable = "avx512bw")]
20028    const fn test_mm512_maskz_set1_epi16() {
20029        let a: i16 = 11;
20030        let r = _mm512_maskz_set1_epi16(0, a);
20031        assert_eq_m512i(r, _mm512_setzero_si512());
20032        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
20033        let e = _mm512_set1_epi16(11);
20034        assert_eq_m512i(r, e);
20035    }
20036
20037    #[simd_test(enable = "avx512bw,avx512vl")]
20038    const fn test_mm256_mask_set1_epi16() {
20039        let src = _mm256_set1_epi16(2);
20040        let a: i16 = 11;
20041        let r = _mm256_mask_set1_epi16(src, 0, a);
20042        assert_eq_m256i(r, src);
20043        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
20044        let e = _mm256_set1_epi16(11);
20045        assert_eq_m256i(r, e);
20046    }
20047
20048    #[simd_test(enable = "avx512bw,avx512vl")]
20049    const fn test_mm256_maskz_set1_epi16() {
20050        let a: i16 = 11;
20051        let r = _mm256_maskz_set1_epi16(0, a);
20052        assert_eq_m256i(r, _mm256_setzero_si256());
20053        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
20054        let e = _mm256_set1_epi16(11);
20055        assert_eq_m256i(r, e);
20056    }
20057
20058    #[simd_test(enable = "avx512bw,avx512vl")]
20059    const fn test_mm_mask_set1_epi16() {
20060        let src = _mm_set1_epi16(2);
20061        let a: i16 = 11;
20062        let r = _mm_mask_set1_epi16(src, 0, a);
20063        assert_eq_m128i(r, src);
20064        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
20065        let e = _mm_set1_epi16(11);
20066        assert_eq_m128i(r, e);
20067    }
20068
20069    #[simd_test(enable = "avx512bw,avx512vl")]
20070    const fn test_mm_maskz_set1_epi16() {
20071        let a: i16 = 11;
20072        let r = _mm_maskz_set1_epi16(0, a);
20073        assert_eq_m128i(r, _mm_setzero_si128());
20074        let r = _mm_maskz_set1_epi16(0b11111111, a);
20075        let e = _mm_set1_epi16(11);
20076        assert_eq_m128i(r, e);
20077    }
20078
20079    #[simd_test(enable = "avx512bw")]
20080    const fn test_mm512_mask_set1_epi8() {
20081        let src = _mm512_set1_epi8(2);
20082        let a: i8 = 11;
20083        let r = _mm512_mask_set1_epi8(src, 0, a);
20084        assert_eq_m512i(r, src);
20085        let r = _mm512_mask_set1_epi8(
20086            src,
20087            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20088            a,
20089        );
20090        let e = _mm512_set1_epi8(11);
20091        assert_eq_m512i(r, e);
20092    }
20093
20094    #[simd_test(enable = "avx512bw")]
20095    const fn test_mm512_maskz_set1_epi8() {
20096        let a: i8 = 11;
20097        let r = _mm512_maskz_set1_epi8(0, a);
20098        assert_eq_m512i(r, _mm512_setzero_si512());
20099        let r = _mm512_maskz_set1_epi8(
20100            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20101            a,
20102        );
20103        let e = _mm512_set1_epi8(11);
20104        assert_eq_m512i(r, e);
20105    }
20106
20107    #[simd_test(enable = "avx512bw,avx512vl")]
20108    const fn test_mm256_mask_set1_epi8() {
20109        let src = _mm256_set1_epi8(2);
20110        let a: i8 = 11;
20111        let r = _mm256_mask_set1_epi8(src, 0, a);
20112        assert_eq_m256i(r, src);
20113        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20114        let e = _mm256_set1_epi8(11);
20115        assert_eq_m256i(r, e);
20116    }
20117
20118    #[simd_test(enable = "avx512bw,avx512vl")]
20119    const fn test_mm256_maskz_set1_epi8() {
20120        let a: i8 = 11;
20121        let r = _mm256_maskz_set1_epi8(0, a);
20122        assert_eq_m256i(r, _mm256_setzero_si256());
20123        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
20124        let e = _mm256_set1_epi8(11);
20125        assert_eq_m256i(r, e);
20126    }
20127
20128    #[simd_test(enable = "avx512bw,avx512vl")]
20129    const fn test_mm_mask_set1_epi8() {
20130        let src = _mm_set1_epi8(2);
20131        let a: i8 = 11;
20132        let r = _mm_mask_set1_epi8(src, 0, a);
20133        assert_eq_m128i(r, src);
20134        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
20135        let e = _mm_set1_epi8(11);
20136        assert_eq_m128i(r, e);
20137    }
20138
20139    #[simd_test(enable = "avx512bw,avx512vl")]
20140    const fn test_mm_maskz_set1_epi8() {
20141        let a: i8 = 11;
20142        let r = _mm_maskz_set1_epi8(0, a);
20143        assert_eq_m128i(r, _mm_setzero_si128());
20144        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
20145        let e = _mm_set1_epi8(11);
20146        assert_eq_m128i(r, e);
20147    }
20148
20149    #[simd_test(enable = "avx512bw")]
20150    const fn test_mm512_shufflelo_epi16() {
20151        #[rustfmt::skip]
20152        let a = _mm512_set_epi16(
20153            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20154            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20155        );
20156        #[rustfmt::skip]
20157        let e = _mm512_set_epi16(
20158            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20159            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20160        );
20161        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
20162        assert_eq_m512i(r, e);
20163    }
20164
20165    #[simd_test(enable = "avx512bw")]
20166    const fn test_mm512_mask_shufflelo_epi16() {
20167        #[rustfmt::skip]
20168        let a = _mm512_set_epi16(
20169            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20170            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20171        );
20172        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20173        assert_eq_m512i(r, a);
20174        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
20175            a,
20176            0b11111111_11111111_11111111_11111111,
20177            a,
20178        );
20179        #[rustfmt::skip]
20180        let e = _mm512_set_epi16(
20181            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20182            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20183        );
20184        assert_eq_m512i(r, e);
20185    }
20186
20187    #[simd_test(enable = "avx512bw")]
20188    const fn test_mm512_maskz_shufflelo_epi16() {
20189        #[rustfmt::skip]
20190        let a = _mm512_set_epi16(
20191            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20192            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20193        );
20194        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20195        assert_eq_m512i(r, _mm512_setzero_si512());
20196        let r =
20197            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20198        #[rustfmt::skip]
20199        let e = _mm512_set_epi16(
20200            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20201            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20202        );
20203        assert_eq_m512i(r, e);
20204    }
20205
20206    #[simd_test(enable = "avx512bw,avx512vl")]
20207    const fn test_mm256_mask_shufflelo_epi16() {
20208        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20209        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20210        assert_eq_m256i(r, a);
20211        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20212        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20213        assert_eq_m256i(r, e);
20214    }
20215
20216    #[simd_test(enable = "avx512bw,avx512vl")]
20217    const fn test_mm256_maskz_shufflelo_epi16() {
20218        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20219        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20220        assert_eq_m256i(r, _mm256_setzero_si256());
20221        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20222        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20223        assert_eq_m256i(r, e);
20224    }
20225
20226    #[simd_test(enable = "avx512bw,avx512vl")]
20227    const fn test_mm_mask_shufflelo_epi16() {
20228        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20229        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20230        assert_eq_m128i(r, a);
20231        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20232        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20233        assert_eq_m128i(r, e);
20234    }
20235
20236    #[simd_test(enable = "avx512bw,avx512vl")]
20237    const fn test_mm_maskz_shufflelo_epi16() {
20238        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20239        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20240        assert_eq_m128i(r, _mm_setzero_si128());
20241        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
20242        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20243        assert_eq_m128i(r, e);
20244    }
20245
20246    #[simd_test(enable = "avx512bw")]
20247    const fn test_mm512_shufflehi_epi16() {
20248        #[rustfmt::skip]
20249        let a = _mm512_set_epi16(
20250            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20251            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20252        );
20253        #[rustfmt::skip]
20254        let e = _mm512_set_epi16(
20255            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20256            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20257        );
20258        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
20259        assert_eq_m512i(r, e);
20260    }
20261
20262    #[simd_test(enable = "avx512bw")]
20263    const fn test_mm512_mask_shufflehi_epi16() {
20264        #[rustfmt::skip]
20265        let a = _mm512_set_epi16(
20266            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20267            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20268        );
20269        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20270        assert_eq_m512i(r, a);
20271        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
20272            a,
20273            0b11111111_11111111_11111111_11111111,
20274            a,
20275        );
20276        #[rustfmt::skip]
20277        let e = _mm512_set_epi16(
20278            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20279            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20280        );
20281        assert_eq_m512i(r, e);
20282    }
20283
20284    #[simd_test(enable = "avx512bw")]
20285    const fn test_mm512_maskz_shufflehi_epi16() {
20286        #[rustfmt::skip]
20287        let a = _mm512_set_epi16(
20288            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20289            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20290        );
20291        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20292        assert_eq_m512i(r, _mm512_setzero_si512());
20293        let r =
20294            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20295        #[rustfmt::skip]
20296        let e = _mm512_set_epi16(
20297            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20298            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20299        );
20300        assert_eq_m512i(r, e);
20301    }
20302
20303    #[simd_test(enable = "avx512bw,avx512vl")]
20304    const fn test_mm256_mask_shufflehi_epi16() {
20305        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20306        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20307        assert_eq_m256i(r, a);
20308        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20309        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20310        assert_eq_m256i(r, e);
20311    }
20312
20313    #[simd_test(enable = "avx512bw,avx512vl")]
20314    const fn test_mm256_maskz_shufflehi_epi16() {
20315        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20316        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20317        assert_eq_m256i(r, _mm256_setzero_si256());
20318        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20319        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20320        assert_eq_m256i(r, e);
20321    }
20322
20323    #[simd_test(enable = "avx512bw,avx512vl")]
20324    const fn test_mm_mask_shufflehi_epi16() {
20325        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20326        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20327        assert_eq_m128i(r, a);
20328        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20329        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20330        assert_eq_m128i(r, e);
20331    }
20332
20333    #[simd_test(enable = "avx512bw,avx512vl")]
20334    const fn test_mm_maskz_shufflehi_epi16() {
20335        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20336        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20337        assert_eq_m128i(r, _mm_setzero_si128());
20338        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
20339        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20340        assert_eq_m128i(r, e);
20341    }
20342
20343    #[simd_test(enable = "avx512bw")]
20344    fn test_mm512_shuffle_epi8() {
20345        #[rustfmt::skip]
20346        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20347                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20348                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20349                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20350        let b = _mm512_set1_epi8(1);
20351        let r = _mm512_shuffle_epi8(a, b);
20352        #[rustfmt::skip]
20353        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20354                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20355                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20356                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20357        assert_eq_m512i(r, e);
20358    }
20359
20360    #[simd_test(enable = "avx512bw")]
20361    fn test_mm512_mask_shuffle_epi8() {
20362        #[rustfmt::skip]
20363        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20364                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20365                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20366                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20367        let b = _mm512_set1_epi8(1);
20368        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
20369        assert_eq_m512i(r, a);
20370        let r = _mm512_mask_shuffle_epi8(
20371            a,
20372            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20373            a,
20374            b,
20375        );
20376        #[rustfmt::skip]
20377        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20378                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20379                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20380                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20381        assert_eq_m512i(r, e);
20382    }
20383
20384    #[simd_test(enable = "avx512bw")]
20385    fn test_mm512_maskz_shuffle_epi8() {
20386        #[rustfmt::skip]
20387        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20388                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20389                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20390                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20391        let b = _mm512_set1_epi8(1);
20392        let r = _mm512_maskz_shuffle_epi8(0, a, b);
20393        assert_eq_m512i(r, _mm512_setzero_si512());
20394        let r = _mm512_maskz_shuffle_epi8(
20395            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20396            a,
20397            b,
20398        );
20399        #[rustfmt::skip]
20400        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20401                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20402                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20403                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20404        assert_eq_m512i(r, e);
20405    }
20406
20407    #[simd_test(enable = "avx512bw,avx512vl")]
20408    fn test_mm256_mask_shuffle_epi8() {
20409        #[rustfmt::skip]
20410        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20411                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20412        let b = _mm256_set1_epi8(1);
20413        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
20414        assert_eq_m256i(r, a);
20415        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20416        #[rustfmt::skip]
20417        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20418                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20419        assert_eq_m256i(r, e);
20420    }
20421
20422    #[simd_test(enable = "avx512bw,avx512vl")]
20423    fn test_mm256_maskz_shuffle_epi8() {
20424        #[rustfmt::skip]
20425        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20426                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20427        let b = _mm256_set1_epi8(1);
20428        let r = _mm256_maskz_shuffle_epi8(0, a, b);
20429        assert_eq_m256i(r, _mm256_setzero_si256());
20430        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
20431        #[rustfmt::skip]
20432        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20433                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20434        assert_eq_m256i(r, e);
20435    }
20436
20437    #[simd_test(enable = "avx512bw,avx512vl")]
20438    fn test_mm_mask_shuffle_epi8() {
20439        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20440        let b = _mm_set1_epi8(1);
20441        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
20442        assert_eq_m128i(r, a);
20443        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
20444        let e = _mm_set_epi8(
20445            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20446        );
20447        assert_eq_m128i(r, e);
20448    }
20449
20450    #[simd_test(enable = "avx512bw,avx512vl")]
20451    fn test_mm_maskz_shuffle_epi8() {
20452        #[rustfmt::skip]
20453        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
20454        let b = _mm_set1_epi8(1);
20455        let r = _mm_maskz_shuffle_epi8(0, a, b);
20456        assert_eq_m128i(r, _mm_setzero_si128());
20457        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
20458        let e = _mm_set_epi8(
20459            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20460        );
20461        assert_eq_m128i(r, e);
20462    }
20463
20464    #[simd_test(enable = "avx512bw")]
20465    const fn test_mm512_test_epi16_mask() {
20466        let a = _mm512_set1_epi16(1 << 0);
20467        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20468        let r = _mm512_test_epi16_mask(a, b);
20469        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20470        assert_eq!(r, e);
20471    }
20472
20473    #[simd_test(enable = "avx512bw")]
20474    const fn test_mm512_mask_test_epi16_mask() {
20475        let a = _mm512_set1_epi16(1 << 0);
20476        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20477        let r = _mm512_mask_test_epi16_mask(0, a, b);
20478        assert_eq!(r, 0);
20479        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20480        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20481        assert_eq!(r, e);
20482    }
20483
20484    #[simd_test(enable = "avx512bw,avx512vl")]
20485    const fn test_mm256_test_epi16_mask() {
20486        let a = _mm256_set1_epi16(1 << 0);
20487        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20488        let r = _mm256_test_epi16_mask(a, b);
20489        let e: __mmask16 = 0b11111111_11111111;
20490        assert_eq!(r, e);
20491    }
20492
20493    #[simd_test(enable = "avx512bw,avx512vl")]
20494    const fn test_mm256_mask_test_epi16_mask() {
20495        let a = _mm256_set1_epi16(1 << 0);
20496        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20497        let r = _mm256_mask_test_epi16_mask(0, a, b);
20498        assert_eq!(r, 0);
20499        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
20500        let e: __mmask16 = 0b11111111_11111111;
20501        assert_eq!(r, e);
20502    }
20503
20504    #[simd_test(enable = "avx512bw,avx512vl")]
20505    const fn test_mm_test_epi16_mask() {
20506        let a = _mm_set1_epi16(1 << 0);
20507        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20508        let r = _mm_test_epi16_mask(a, b);
20509        let e: __mmask8 = 0b11111111;
20510        assert_eq!(r, e);
20511    }
20512
20513    #[simd_test(enable = "avx512bw,avx512vl")]
20514    const fn test_mm_mask_test_epi16_mask() {
20515        let a = _mm_set1_epi16(1 << 0);
20516        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20517        let r = _mm_mask_test_epi16_mask(0, a, b);
20518        assert_eq!(r, 0);
20519        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
20520        let e: __mmask8 = 0b11111111;
20521        assert_eq!(r, e);
20522    }
20523
20524    #[simd_test(enable = "avx512bw")]
20525    const fn test_mm512_test_epi8_mask() {
20526        let a = _mm512_set1_epi8(1 << 0);
20527        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20528        let r = _mm512_test_epi8_mask(a, b);
20529        let e: __mmask64 =
20530            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20531        assert_eq!(r, e);
20532    }
20533
20534    #[simd_test(enable = "avx512bw")]
20535    const fn test_mm512_mask_test_epi8_mask() {
20536        let a = _mm512_set1_epi8(1 << 0);
20537        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20538        let r = _mm512_mask_test_epi8_mask(0, a, b);
20539        assert_eq!(r, 0);
20540        let r = _mm512_mask_test_epi8_mask(
20541            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20542            a,
20543            b,
20544        );
20545        let e: __mmask64 =
20546            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20547        assert_eq!(r, e);
20548    }
20549
20550    #[simd_test(enable = "avx512bw,avx512vl")]
20551    const fn test_mm256_test_epi8_mask() {
20552        let a = _mm256_set1_epi8(1 << 0);
20553        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20554        let r = _mm256_test_epi8_mask(a, b);
20555        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20556        assert_eq!(r, e);
20557    }
20558
20559    #[simd_test(enable = "avx512bw,avx512vl")]
20560    const fn test_mm256_mask_test_epi8_mask() {
20561        let a = _mm256_set1_epi8(1 << 0);
20562        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20563        let r = _mm256_mask_test_epi8_mask(0, a, b);
20564        assert_eq!(r, 0);
20565        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20566        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20567        assert_eq!(r, e);
20568    }
20569
20570    #[simd_test(enable = "avx512bw,avx512vl")]
20571    const fn test_mm_test_epi8_mask() {
20572        let a = _mm_set1_epi8(1 << 0);
20573        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20574        let r = _mm_test_epi8_mask(a, b);
20575        let e: __mmask16 = 0b11111111_11111111;
20576        assert_eq!(r, e);
20577    }
20578
20579    #[simd_test(enable = "avx512bw,avx512vl")]
20580    const fn test_mm_mask_test_epi8_mask() {
20581        let a = _mm_set1_epi8(1 << 0);
20582        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20583        let r = _mm_mask_test_epi8_mask(0, a, b);
20584        assert_eq!(r, 0);
20585        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
20586        let e: __mmask16 = 0b11111111_11111111;
20587        assert_eq!(r, e);
20588    }
20589
20590    #[simd_test(enable = "avx512bw")]
20591    const fn test_mm512_testn_epi16_mask() {
20592        let a = _mm512_set1_epi16(1 << 0);
20593        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20594        let r = _mm512_testn_epi16_mask(a, b);
20595        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20596        assert_eq!(r, e);
20597    }
20598
20599    #[simd_test(enable = "avx512bw")]
20600    const fn test_mm512_mask_testn_epi16_mask() {
20601        let a = _mm512_set1_epi16(1 << 0);
20602        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20603        let r = _mm512_mask_testn_epi16_mask(0, a, b);
20604        assert_eq!(r, 0);
20605        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20606        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20607        assert_eq!(r, e);
20608    }
20609
20610    #[simd_test(enable = "avx512bw,avx512vl")]
20611    const fn test_mm256_testn_epi16_mask() {
20612        let a = _mm256_set1_epi16(1 << 0);
20613        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20614        let r = _mm256_testn_epi16_mask(a, b);
20615        let e: __mmask16 = 0b00000000_00000000;
20616        assert_eq!(r, e);
20617    }
20618
20619    #[simd_test(enable = "avx512bw,avx512vl")]
20620    const fn test_mm256_mask_testn_epi16_mask() {
20621        let a = _mm256_set1_epi16(1 << 0);
20622        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20623        let r = _mm256_mask_testn_epi16_mask(0, a, b);
20624        assert_eq!(r, 0);
20625        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
20626        let e: __mmask16 = 0b00000000_00000000;
20627        assert_eq!(r, e);
20628    }
20629
20630    #[simd_test(enable = "avx512bw,avx512vl")]
20631    const fn test_mm_testn_epi16_mask() {
20632        let a = _mm_set1_epi16(1 << 0);
20633        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20634        let r = _mm_testn_epi16_mask(a, b);
20635        let e: __mmask8 = 0b00000000;
20636        assert_eq!(r, e);
20637    }
20638
20639    #[simd_test(enable = "avx512bw,avx512vl")]
20640    const fn test_mm_mask_testn_epi16_mask() {
20641        let a = _mm_set1_epi16(1 << 0);
20642        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20643        let r = _mm_mask_testn_epi16_mask(0, a, b);
20644        assert_eq!(r, 0);
20645        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
20646        let e: __mmask8 = 0b00000000;
20647        assert_eq!(r, e);
20648    }
20649
20650    #[simd_test(enable = "avx512bw")]
20651    const fn test_mm512_testn_epi8_mask() {
20652        let a = _mm512_set1_epi8(1 << 0);
20653        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20654        let r = _mm512_testn_epi8_mask(a, b);
20655        let e: __mmask64 =
20656            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20657        assert_eq!(r, e);
20658    }
20659
20660    #[simd_test(enable = "avx512bw")]
20661    const fn test_mm512_mask_testn_epi8_mask() {
20662        let a = _mm512_set1_epi8(1 << 0);
20663        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20664        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20665        assert_eq!(r, 0);
20666        let r = _mm512_mask_testn_epi8_mask(
20667            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20668            a,
20669            b,
20670        );
20671        let e: __mmask64 =
20672            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20673        assert_eq!(r, e);
20674    }
20675
20676    #[simd_test(enable = "avx512bw,avx512vl")]
20677    const fn test_mm256_testn_epi8_mask() {
20678        let a = _mm256_set1_epi8(1 << 0);
20679        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20680        let r = _mm256_testn_epi8_mask(a, b);
20681        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20682        assert_eq!(r, e);
20683    }
20684
20685    #[simd_test(enable = "avx512bw,avx512vl")]
20686    const fn test_mm256_mask_testn_epi8_mask() {
20687        let a = _mm256_set1_epi8(1 << 0);
20688        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20689        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20690        assert_eq!(r, 0);
20691        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20692        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20693        assert_eq!(r, e);
20694    }
20695
20696    #[simd_test(enable = "avx512bw,avx512vl")]
20697    const fn test_mm_testn_epi8_mask() {
20698        let a = _mm_set1_epi8(1 << 0);
20699        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20700        let r = _mm_testn_epi8_mask(a, b);
20701        let e: __mmask16 = 0b00000000_00000000;
20702        assert_eq!(r, e);
20703    }
20704
20705    #[simd_test(enable = "avx512bw,avx512vl")]
20706    const fn test_mm_mask_testn_epi8_mask() {
20707        let a = _mm_set1_epi8(1 << 0);
20708        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20709        let r = _mm_mask_testn_epi8_mask(0, a, b);
20710        assert_eq!(r, 0);
20711        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20712        let e: __mmask16 = 0b00000000_00000000;
20713        assert_eq!(r, e);
20714    }
20715
20716    #[simd_test(enable = "avx512bw")]
20717    const unsafe fn test_store_mask64() {
20718        let a: __mmask64 =
20719            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20720        let mut r = 0;
20721        _store_mask64(&mut r, a);
20722        assert_eq!(r, a);
20723    }
20724
20725    #[simd_test(enable = "avx512bw")]
20726    const unsafe fn test_store_mask32() {
20727        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20728        let mut r = 0;
20729        _store_mask32(&mut r, a);
20730        assert_eq!(r, a);
20731    }
20732
20733    #[simd_test(enable = "avx512bw")]
20734    const unsafe fn test_load_mask64() {
20735        let p: __mmask64 =
20736            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20737        let r = _load_mask64(&p);
20738        let e: __mmask64 =
20739            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20740        assert_eq!(r, e);
20741    }
20742
20743    #[simd_test(enable = "avx512bw")]
20744    const unsafe fn test_load_mask32() {
20745        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20746        let r = _load_mask32(&p);
20747        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20748        assert_eq!(r, e);
20749    }
20750
20751    #[simd_test(enable = "avx512bw")]
20752    fn test_mm512_sad_epu8() {
20753        let a = _mm512_set1_epi8(2);
20754        let b = _mm512_set1_epi8(4);
20755        let r = _mm512_sad_epu8(a, b);
20756        let e = _mm512_set1_epi64(16);
20757        assert_eq_m512i(r, e);
20758    }
20759
20760    #[simd_test(enable = "avx512bw")]
20761    fn test_mm512_dbsad_epu8() {
20762        let a = _mm512_set1_epi8(2);
20763        let b = _mm512_set1_epi8(4);
20764        let r = _mm512_dbsad_epu8::<0>(a, b);
20765        let e = _mm512_set1_epi16(8);
20766        assert_eq_m512i(r, e);
20767    }
20768
20769    #[simd_test(enable = "avx512bw")]
20770    fn test_mm512_mask_dbsad_epu8() {
20771        let src = _mm512_set1_epi16(1);
20772        let a = _mm512_set1_epi8(2);
20773        let b = _mm512_set1_epi8(4);
20774        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20775        assert_eq_m512i(r, src);
20776        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20777        let e = _mm512_set1_epi16(8);
20778        assert_eq_m512i(r, e);
20779    }
20780
20781    #[simd_test(enable = "avx512bw")]
20782    fn test_mm512_maskz_dbsad_epu8() {
20783        let a = _mm512_set1_epi8(2);
20784        let b = _mm512_set1_epi8(4);
20785        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20786        assert_eq_m512i(r, _mm512_setzero_si512());
20787        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20788        let e = _mm512_set1_epi16(8);
20789        assert_eq_m512i(r, e);
20790    }
20791
20792    #[simd_test(enable = "avx512bw,avx512vl")]
20793    fn test_mm256_dbsad_epu8() {
20794        let a = _mm256_set1_epi8(2);
20795        let b = _mm256_set1_epi8(4);
20796        let r = _mm256_dbsad_epu8::<0>(a, b);
20797        let e = _mm256_set1_epi16(8);
20798        assert_eq_m256i(r, e);
20799    }
20800
20801    #[simd_test(enable = "avx512bw,avx512vl")]
20802    fn test_mm256_mask_dbsad_epu8() {
20803        let src = _mm256_set1_epi16(1);
20804        let a = _mm256_set1_epi8(2);
20805        let b = _mm256_set1_epi8(4);
20806        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
20807        assert_eq_m256i(r, src);
20808        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
20809        let e = _mm256_set1_epi16(8);
20810        assert_eq_m256i(r, e);
20811    }
20812
20813    #[simd_test(enable = "avx512bw,avx512vl")]
20814    fn test_mm256_maskz_dbsad_epu8() {
20815        let a = _mm256_set1_epi8(2);
20816        let b = _mm256_set1_epi8(4);
20817        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
20818        assert_eq_m256i(r, _mm256_setzero_si256());
20819        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
20820        let e = _mm256_set1_epi16(8);
20821        assert_eq_m256i(r, e);
20822    }
20823
20824    #[simd_test(enable = "avx512bw,avx512vl")]
20825    fn test_mm_dbsad_epu8() {
20826        let a = _mm_set1_epi8(2);
20827        let b = _mm_set1_epi8(4);
20828        let r = _mm_dbsad_epu8::<0>(a, b);
20829        let e = _mm_set1_epi16(8);
20830        assert_eq_m128i(r, e);
20831    }
20832
20833    #[simd_test(enable = "avx512bw,avx512vl")]
20834    fn test_mm_mask_dbsad_epu8() {
20835        let src = _mm_set1_epi16(1);
20836        let a = _mm_set1_epi8(2);
20837        let b = _mm_set1_epi8(4);
20838        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
20839        assert_eq_m128i(r, src);
20840        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
20841        let e = _mm_set1_epi16(8);
20842        assert_eq_m128i(r, e);
20843    }
20844
20845    #[simd_test(enable = "avx512bw,avx512vl")]
20846    fn test_mm_maskz_dbsad_epu8() {
20847        let a = _mm_set1_epi8(2);
20848        let b = _mm_set1_epi8(4);
20849        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
20850        assert_eq_m128i(r, _mm_setzero_si128());
20851        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
20852        let e = _mm_set1_epi16(8);
20853        assert_eq_m128i(r, e);
20854    }
20855
20856    #[simd_test(enable = "avx512bw")]
20857    const fn test_mm512_movepi16_mask() {
20858        let a = _mm512_set1_epi16(1 << 15);
20859        let r = _mm512_movepi16_mask(a);
20860        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20861        assert_eq!(r, e);
20862    }
20863
20864    #[simd_test(enable = "avx512bw,avx512vl")]
20865    const fn test_mm256_movepi16_mask() {
20866        let a = _mm256_set1_epi16(1 << 15);
20867        let r = _mm256_movepi16_mask(a);
20868        let e: __mmask16 = 0b11111111_11111111;
20869        assert_eq!(r, e);
20870    }
20871
20872    #[simd_test(enable = "avx512bw,avx512vl")]
20873    const fn test_mm_movepi16_mask() {
20874        let a = _mm_set1_epi16(1 << 15);
20875        let r = _mm_movepi16_mask(a);
20876        let e: __mmask8 = 0b11111111;
20877        assert_eq!(r, e);
20878    }
20879
20880    #[simd_test(enable = "avx512bw")]
20881    const fn test_mm512_movepi8_mask() {
20882        let a = _mm512_set1_epi8(1 << 7);
20883        let r = _mm512_movepi8_mask(a);
20884        let e: __mmask64 =
20885            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20886        assert_eq!(r, e);
20887    }
20888
20889    #[simd_test(enable = "avx512bw,avx512vl")]
20890    const fn test_mm256_movepi8_mask() {
20891        let a = _mm256_set1_epi8(1 << 7);
20892        let r = _mm256_movepi8_mask(a);
20893        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20894        assert_eq!(r, e);
20895    }
20896
20897    #[simd_test(enable = "avx512bw,avx512vl")]
20898    const fn test_mm_movepi8_mask() {
20899        let a = _mm_set1_epi8(1 << 7);
20900        let r = _mm_movepi8_mask(a);
20901        let e: __mmask16 = 0b11111111_11111111;
20902        assert_eq!(r, e);
20903    }
20904
20905    #[simd_test(enable = "avx512bw")]
20906    const fn test_mm512_movm_epi16() {
20907        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20908        let r = _mm512_movm_epi16(a);
20909        let e = _mm512_set1_epi16(
20910            1 << 15
20911                | 1 << 14
20912                | 1 << 13
20913                | 1 << 12
20914                | 1 << 11
20915                | 1 << 10
20916                | 1 << 9
20917                | 1 << 8
20918                | 1 << 7
20919                | 1 << 6
20920                | 1 << 5
20921                | 1 << 4
20922                | 1 << 3
20923                | 1 << 2
20924                | 1 << 1
20925                | 1 << 0,
20926        );
20927        assert_eq_m512i(r, e);
20928    }
20929
20930    #[simd_test(enable = "avx512bw,avx512vl")]
20931    const fn test_mm256_movm_epi16() {
20932        let a: __mmask16 = 0b11111111_11111111;
20933        let r = _mm256_movm_epi16(a);
20934        let e = _mm256_set1_epi16(
20935            1 << 15
20936                | 1 << 14
20937                | 1 << 13
20938                | 1 << 12
20939                | 1 << 11
20940                | 1 << 10
20941                | 1 << 9
20942                | 1 << 8
20943                | 1 << 7
20944                | 1 << 6
20945                | 1 << 5
20946                | 1 << 4
20947                | 1 << 3
20948                | 1 << 2
20949                | 1 << 1
20950                | 1 << 0,
20951        );
20952        assert_eq_m256i(r, e);
20953    }
20954
20955    #[simd_test(enable = "avx512bw,avx512vl")]
20956    const fn test_mm_movm_epi16() {
20957        let a: __mmask8 = 0b11111111;
20958        let r = _mm_movm_epi16(a);
20959        let e = _mm_set1_epi16(
20960            1 << 15
20961                | 1 << 14
20962                | 1 << 13
20963                | 1 << 12
20964                | 1 << 11
20965                | 1 << 10
20966                | 1 << 9
20967                | 1 << 8
20968                | 1 << 7
20969                | 1 << 6
20970                | 1 << 5
20971                | 1 << 4
20972                | 1 << 3
20973                | 1 << 2
20974                | 1 << 1
20975                | 1 << 0,
20976        );
20977        assert_eq_m128i(r, e);
20978    }
20979
20980    #[simd_test(enable = "avx512bw")]
20981    const fn test_mm512_movm_epi8() {
20982        let a: __mmask64 =
20983            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20984        let r = _mm512_movm_epi8(a);
20985        let e =
20986            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20987        assert_eq_m512i(r, e);
20988    }
20989
20990    #[simd_test(enable = "avx512bw,avx512vl")]
20991    const fn test_mm256_movm_epi8() {
20992        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20993        let r = _mm256_movm_epi8(a);
20994        let e =
20995            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20996        assert_eq_m256i(r, e);
20997    }
20998
20999    #[simd_test(enable = "avx512bw,avx512vl")]
21000    const fn test_mm_movm_epi8() {
21001        let a: __mmask16 = 0b11111111_11111111;
21002        let r = _mm_movm_epi8(a);
21003        let e =
21004            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21005        assert_eq_m128i(r, e);
21006    }
21007
21008    #[simd_test(enable = "avx512bw")]
21009    const fn test_cvtmask32_u32() {
21010        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
21011        let r = _cvtmask32_u32(a);
21012        let e: u32 = 0b11001100_00110011_01100110_10011001;
21013        assert_eq!(r, e);
21014    }
21015
21016    #[simd_test(enable = "avx512bw")]
21017    const fn test_cvtu32_mask32() {
21018        let a: u32 = 0b11001100_00110011_01100110_10011001;
21019        let r = _cvtu32_mask32(a);
21020        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
21021        assert_eq!(r, e);
21022    }
21023
21024    #[simd_test(enable = "avx512bw")]
21025    const fn test_kadd_mask32() {
21026        let a: __mmask32 = 11;
21027        let b: __mmask32 = 22;
21028        let r = _kadd_mask32(a, b);
21029        let e: __mmask32 = 33;
21030        assert_eq!(r, e);
21031    }
21032
21033    #[simd_test(enable = "avx512bw")]
21034    const fn test_kadd_mask64() {
21035        let a: __mmask64 = 11;
21036        let b: __mmask64 = 22;
21037        let r = _kadd_mask64(a, b);
21038        let e: __mmask64 = 33;
21039        assert_eq!(r, e);
21040    }
21041
21042    #[simd_test(enable = "avx512bw")]
21043    const fn test_kand_mask32() {
21044        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21045        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21046        let r = _kand_mask32(a, b);
21047        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
21048        assert_eq!(r, e);
21049    }
21050
21051    #[simd_test(enable = "avx512bw")]
21052    const fn test_kand_mask64() {
21053        let a: __mmask64 =
21054            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21055        let b: __mmask64 =
21056            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21057        let r = _kand_mask64(a, b);
21058        let e: __mmask64 =
21059            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21060        assert_eq!(r, e);
21061    }
21062
21063    #[simd_test(enable = "avx512bw")]
21064    const fn test_knot_mask32() {
21065        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21066        let r = _knot_mask32(a);
21067        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
21068        assert_eq!(r, e);
21069    }
21070
21071    #[simd_test(enable = "avx512bw")]
21072    const fn test_knot_mask64() {
21073        let a: __mmask64 =
21074            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21075        let r = _knot_mask64(a);
21076        let e: __mmask64 =
21077            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21078        assert_eq!(r, e);
21079    }
21080
21081    #[simd_test(enable = "avx512bw")]
21082    const fn test_kandn_mask32() {
21083        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21084        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21085        let r = _kandn_mask32(a, b);
21086        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21087        assert_eq!(r, e);
21088    }
21089
21090    #[simd_test(enable = "avx512bw")]
21091    const fn test_kandn_mask64() {
21092        let a: __mmask64 =
21093            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21094        let b: __mmask64 =
21095            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21096        let r = _kandn_mask64(a, b);
21097        let e: __mmask64 =
21098            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21099        assert_eq!(r, e);
21100    }
21101
21102    #[simd_test(enable = "avx512bw")]
21103    const fn test_kor_mask32() {
21104        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21105        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21106        let r = _kor_mask32(a, b);
21107        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21108        assert_eq!(r, e);
21109    }
21110
21111    #[simd_test(enable = "avx512bw")]
21112    const fn test_kor_mask64() {
21113        let a: __mmask64 =
21114            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21115        let b: __mmask64 =
21116            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21117        let r = _kor_mask64(a, b);
21118        let e: __mmask64 =
21119            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21120        assert_eq!(r, e);
21121    }
21122
21123    #[simd_test(enable = "avx512bw")]
21124    const fn test_kxor_mask32() {
21125        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21126        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21127        let r = _kxor_mask32(a, b);
21128        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21129        assert_eq!(r, e);
21130    }
21131
21132    #[simd_test(enable = "avx512bw")]
21133    const fn test_kxor_mask64() {
21134        let a: __mmask64 =
21135            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21136        let b: __mmask64 =
21137            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21138        let r = _kxor_mask64(a, b);
21139        let e: __mmask64 =
21140            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21141        assert_eq!(r, e);
21142    }
21143
21144    #[simd_test(enable = "avx512bw")]
21145    const fn test_kxnor_mask32() {
21146        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21147        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21148        let r = _kxnor_mask32(a, b);
21149        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21150        assert_eq!(r, e);
21151    }
21152
21153    #[simd_test(enable = "avx512bw")]
21154    const fn test_kxnor_mask64() {
21155        let a: __mmask64 =
21156            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21157        let b: __mmask64 =
21158            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21159        let r = _kxnor_mask64(a, b);
21160        let e: __mmask64 =
21161            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21162        assert_eq!(r, e);
21163    }
21164
21165    #[simd_test(enable = "avx512bw")]
21166    const unsafe fn test_kortest_mask32_u8() {
21167        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21168        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21169        let mut all_ones: u8 = 0;
21170        let r = _kortest_mask32_u8(a, b, &mut all_ones);
21171        assert_eq!(r, 0);
21172        assert_eq!(all_ones, 1);
21173    }
21174
21175    #[simd_test(enable = "avx512bw")]
21176    const unsafe fn test_kortest_mask64_u8() {
21177        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21178        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21179        let mut all_ones: u8 = 0;
21180        let r = _kortest_mask64_u8(a, b, &mut all_ones);
21181        assert_eq!(r, 0);
21182        assert_eq!(all_ones, 0);
21183    }
21184
21185    #[simd_test(enable = "avx512bw")]
21186    const fn test_kortestc_mask32_u8() {
21187        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21188        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21189        let r = _kortestc_mask32_u8(a, b);
21190        assert_eq!(r, 1);
21191    }
21192
21193    #[simd_test(enable = "avx512bw")]
21194    const fn test_kortestc_mask64_u8() {
21195        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21196        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21197        let r = _kortestc_mask64_u8(a, b);
21198        assert_eq!(r, 0);
21199    }
21200
21201    #[simd_test(enable = "avx512bw")]
21202    const fn test_kortestz_mask32_u8() {
21203        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21204        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21205        let r = _kortestz_mask32_u8(a, b);
21206        assert_eq!(r, 0);
21207    }
21208
21209    #[simd_test(enable = "avx512bw")]
21210    const fn test_kortestz_mask64_u8() {
21211        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21212        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21213        let r = _kortestz_mask64_u8(a, b);
21214        assert_eq!(r, 0);
21215    }
21216
21217    #[simd_test(enable = "avx512bw")]
21218    const fn test_kshiftli_mask32() {
21219        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21220        let r = _kshiftli_mask32::<3>(a);
21221        let e: __mmask32 = 0b0100101101001011_0100101101001000;
21222        assert_eq!(r, e);
21223
21224        let r = _kshiftli_mask32::<31>(a);
21225        let e: __mmask32 = 0b1000000000000000_0000000000000000;
21226        assert_eq!(r, e);
21227
21228        let r = _kshiftli_mask32::<32>(a);
21229        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21230        assert_eq!(r, e);
21231
21232        let r = _kshiftli_mask32::<33>(a);
21233        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21234        assert_eq!(r, e);
21235    }
21236
21237    #[simd_test(enable = "avx512bw")]
21238    const fn test_kshiftli_mask64() {
21239        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21240        let r = _kshiftli_mask64::<3>(a);
21241        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
21242        assert_eq!(r, e);
21243
21244        let r = _kshiftli_mask64::<63>(a);
21245        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
21246        assert_eq!(r, e);
21247
21248        let r = _kshiftli_mask64::<64>(a);
21249        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21250        assert_eq!(r, e);
21251
21252        let r = _kshiftli_mask64::<65>(a);
21253        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21254        assert_eq!(r, e);
21255    }
21256
21257    #[simd_test(enable = "avx512bw")]
21258    const fn test_kshiftri_mask32() {
21259        let a: __mmask32 = 0b1010100101101001_0110100101101001;
21260        let r = _kshiftri_mask32::<3>(a);
21261        let e: __mmask32 = 0b0001010100101101_0010110100101101;
21262        assert_eq!(r, e);
21263
21264        let r = _kshiftri_mask32::<31>(a);
21265        let e: __mmask32 = 0b0000000000000000_0000000000000001;
21266        assert_eq!(r, e);
21267
21268        let r = _kshiftri_mask32::<32>(a);
21269        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21270        assert_eq!(r, e);
21271
21272        let r = _kshiftri_mask32::<33>(a);
21273        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21274        assert_eq!(r, e);
21275    }
21276
21277    #[simd_test(enable = "avx512bw")]
21278    const fn test_kshiftri_mask64() {
21279        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
21280        let r = _kshiftri_mask64::<3>(a);
21281        let e: __mmask64 = 0b1010100101101001_0110100101101001;
21282        assert_eq!(r, e);
21283
21284        let r = _kshiftri_mask64::<34>(a);
21285        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
21286        assert_eq!(r, e);
21287
21288        let r = _kshiftri_mask64::<35>(a);
21289        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21290        assert_eq!(r, e);
21291
21292        let r = _kshiftri_mask64::<64>(a);
21293        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21294        assert_eq!(r, e);
21295
21296        let r = _kshiftri_mask64::<65>(a);
21297        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21298        assert_eq!(r, e);
21299    }
21300
21301    #[simd_test(enable = "avx512bw")]
21302    const unsafe fn test_ktest_mask32_u8() {
21303        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21304        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21305        let mut and_not: u8 = 0;
21306        let r = _ktest_mask32_u8(a, b, &mut and_not);
21307        assert_eq!(r, 1);
21308        assert_eq!(and_not, 0);
21309    }
21310
21311    #[simd_test(enable = "avx512bw")]
21312    const fn test_ktestc_mask32_u8() {
21313        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21314        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21315        let r = _ktestc_mask32_u8(a, b);
21316        assert_eq!(r, 0);
21317    }
21318
21319    #[simd_test(enable = "avx512bw")]
21320    const fn test_ktestz_mask32_u8() {
21321        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21322        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21323        let r = _ktestz_mask32_u8(a, b);
21324        assert_eq!(r, 1);
21325    }
21326
21327    #[simd_test(enable = "avx512bw")]
21328    const unsafe fn test_ktest_mask64_u8() {
21329        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21330        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21331        let mut and_not: u8 = 0;
21332        let r = _ktest_mask64_u8(a, b, &mut and_not);
21333        assert_eq!(r, 1);
21334        assert_eq!(and_not, 0);
21335    }
21336
21337    #[simd_test(enable = "avx512bw")]
21338    const fn test_ktestc_mask64_u8() {
21339        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21340        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21341        let r = _ktestc_mask64_u8(a, b);
21342        assert_eq!(r, 0);
21343    }
21344
21345    #[simd_test(enable = "avx512bw")]
21346    const fn test_ktestz_mask64_u8() {
21347        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21348        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21349        let r = _ktestz_mask64_u8(a, b);
21350        assert_eq!(r, 1);
21351    }
21352
21353    #[simd_test(enable = "avx512bw")]
21354    const fn test_mm512_kunpackw() {
21355        let a: u32 = 0x00110011;
21356        let b: u32 = 0x00001011;
21357        let r = _mm512_kunpackw(a, b);
21358        let e: u32 = 0x00111011;
21359        assert_eq!(r, e);
21360    }
21361
21362    #[simd_test(enable = "avx512bw")]
21363    const fn test_mm512_kunpackd() {
21364        let a: u64 = 0x11001100_00110011;
21365        let b: u64 = 0x00101110_00001011;
21366        let r = _mm512_kunpackd(a, b);
21367        let e: u64 = 0x00110011_00001011;
21368        assert_eq!(r, e);
21369    }
21370
21371    #[simd_test(enable = "avx512bw")]
21372    const fn test_mm512_cvtepi16_epi8() {
21373        let a = _mm512_set1_epi16(2);
21374        let r = _mm512_cvtepi16_epi8(a);
21375        let e = _mm256_set1_epi8(2);
21376        assert_eq_m256i(r, e);
21377    }
21378
21379    #[simd_test(enable = "avx512bw")]
21380    const fn test_mm512_mask_cvtepi16_epi8() {
21381        let src = _mm256_set1_epi8(1);
21382        let a = _mm512_set1_epi16(2);
21383        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
21384        assert_eq_m256i(r, src);
21385        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21386        let e = _mm256_set1_epi8(2);
21387        assert_eq_m256i(r, e);
21388    }
21389
21390    #[simd_test(enable = "avx512bw")]
21391    const fn test_mm512_maskz_cvtepi16_epi8() {
21392        let a = _mm512_set1_epi16(2);
21393        let r = _mm512_maskz_cvtepi16_epi8(0, a);
21394        assert_eq_m256i(r, _mm256_setzero_si256());
21395        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21396        let e = _mm256_set1_epi8(2);
21397        assert_eq_m256i(r, e);
21398    }
21399
21400    #[simd_test(enable = "avx512bw,avx512vl")]
21401    const fn test_mm256_cvtepi16_epi8() {
21402        let a = _mm256_set1_epi16(2);
21403        let r = _mm256_cvtepi16_epi8(a);
21404        let e = _mm_set1_epi8(2);
21405        assert_eq_m128i(r, e);
21406    }
21407
21408    #[simd_test(enable = "avx512bw,avx512vl")]
21409    const fn test_mm256_mask_cvtepi16_epi8() {
21410        let src = _mm_set1_epi8(1);
21411        let a = _mm256_set1_epi16(2);
21412        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
21413        assert_eq_m128i(r, src);
21414        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
21415        let e = _mm_set1_epi8(2);
21416        assert_eq_m128i(r, e);
21417    }
21418
21419    #[simd_test(enable = "avx512bw,avx512vl")]
21420    const fn test_mm256_maskz_cvtepi16_epi8() {
21421        let a = _mm256_set1_epi16(2);
21422        let r = _mm256_maskz_cvtepi16_epi8(0, a);
21423        assert_eq_m128i(r, _mm_setzero_si128());
21424        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
21425        let e = _mm_set1_epi8(2);
21426        assert_eq_m128i(r, e);
21427    }
21428
21429    #[simd_test(enable = "avx512bw,avx512vl")]
21430    const fn test_mm_cvtepi16_epi8() {
21431        let a = _mm_set1_epi16(2);
21432        let r = _mm_cvtepi16_epi8(a);
21433        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21434        assert_eq_m128i(r, e);
21435    }
21436
21437    #[simd_test(enable = "avx512bw,avx512vl")]
21438    const fn test_mm_mask_cvtepi16_epi8() {
21439        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21440        let a = _mm_set1_epi16(2);
21441        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
21442        assert_eq_m128i(r, src);
21443        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
21444        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21445        assert_eq_m128i(r, e);
21446    }
21447
21448    #[simd_test(enable = "avx512bw,avx512vl")]
21449    const fn test_mm_maskz_cvtepi16_epi8() {
21450        let a = _mm_set1_epi16(2);
21451        let r = _mm_maskz_cvtepi16_epi8(0, a);
21452        assert_eq_m128i(r, _mm_setzero_si128());
21453        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
21454        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21455        assert_eq_m128i(r, e);
21456    }
21457
21458    #[simd_test(enable = "avx512bw")]
21459    fn test_mm512_cvtsepi16_epi8() {
21460        let a = _mm512_set1_epi16(i16::MAX);
21461        let r = _mm512_cvtsepi16_epi8(a);
21462        let e = _mm256_set1_epi8(i8::MAX);
21463        assert_eq_m256i(r, e);
21464    }
21465
21466    #[simd_test(enable = "avx512bw")]
21467    fn test_mm512_mask_cvtsepi16_epi8() {
21468        let src = _mm256_set1_epi8(1);
21469        let a = _mm512_set1_epi16(i16::MAX);
21470        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
21471        assert_eq_m256i(r, src);
21472        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21473        let e = _mm256_set1_epi8(i8::MAX);
21474        assert_eq_m256i(r, e);
21475    }
21476
21477    #[simd_test(enable = "avx512bw,avx512vl")]
21478    fn test_mm256_cvtsepi16_epi8() {
21479        let a = _mm256_set1_epi16(i16::MAX);
21480        let r = _mm256_cvtsepi16_epi8(a);
21481        let e = _mm_set1_epi8(i8::MAX);
21482        assert_eq_m128i(r, e);
21483    }
21484
21485    #[simd_test(enable = "avx512bw,avx512vl")]
21486    fn test_mm256_mask_cvtsepi16_epi8() {
21487        let src = _mm_set1_epi8(1);
21488        let a = _mm256_set1_epi16(i16::MAX);
21489        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
21490        assert_eq_m128i(r, src);
21491        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
21492        let e = _mm_set1_epi8(i8::MAX);
21493        assert_eq_m128i(r, e);
21494    }
21495
21496    #[simd_test(enable = "avx512bw,avx512vl")]
21497    fn test_mm256_maskz_cvtsepi16_epi8() {
21498        let a = _mm256_set1_epi16(i16::MAX);
21499        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
21500        assert_eq_m128i(r, _mm_setzero_si128());
21501        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
21502        let e = _mm_set1_epi8(i8::MAX);
21503        assert_eq_m128i(r, e);
21504    }
21505
21506    #[simd_test(enable = "avx512bw,avx512vl")]
21507    fn test_mm_cvtsepi16_epi8() {
21508        let a = _mm_set1_epi16(i16::MAX);
21509        let r = _mm_cvtsepi16_epi8(a);
21510        #[rustfmt::skip]
21511        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21512        assert_eq_m128i(r, e);
21513    }
21514
21515    #[simd_test(enable = "avx512bw,avx512vl")]
21516    fn test_mm_mask_cvtsepi16_epi8() {
21517        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21518        let a = _mm_set1_epi16(i16::MAX);
21519        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
21520        assert_eq_m128i(r, src);
21521        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
21522        #[rustfmt::skip]
21523        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21524        assert_eq_m128i(r, e);
21525    }
21526
21527    #[simd_test(enable = "avx512bw,avx512vl")]
21528    fn test_mm_maskz_cvtsepi16_epi8() {
21529        let a = _mm_set1_epi16(i16::MAX);
21530        let r = _mm_maskz_cvtsepi16_epi8(0, a);
21531        assert_eq_m128i(r, _mm_setzero_si128());
21532        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
21533        #[rustfmt::skip]
21534        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21535        assert_eq_m128i(r, e);
21536    }
21537
21538    #[simd_test(enable = "avx512bw")]
21539    fn test_mm512_maskz_cvtsepi16_epi8() {
21540        let a = _mm512_set1_epi16(i16::MAX);
21541        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
21542        assert_eq_m256i(r, _mm256_setzero_si256());
21543        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21544        let e = _mm256_set1_epi8(i8::MAX);
21545        assert_eq_m256i(r, e);
21546    }
21547
21548    #[simd_test(enable = "avx512bw")]
21549    fn test_mm512_cvtusepi16_epi8() {
21550        let a = _mm512_set1_epi16(i16::MIN);
21551        let r = _mm512_cvtusepi16_epi8(a);
21552        let e = _mm256_set1_epi8(-1);
21553        assert_eq_m256i(r, e);
21554    }
21555
21556    #[simd_test(enable = "avx512bw")]
21557    fn test_mm512_mask_cvtusepi16_epi8() {
21558        let src = _mm256_set1_epi8(1);
21559        let a = _mm512_set1_epi16(i16::MIN);
21560        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
21561        assert_eq_m256i(r, src);
21562        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21563        let e = _mm256_set1_epi8(-1);
21564        assert_eq_m256i(r, e);
21565    }
21566
21567    #[simd_test(enable = "avx512bw")]
21568    fn test_mm512_maskz_cvtusepi16_epi8() {
21569        let a = _mm512_set1_epi16(i16::MIN);
21570        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
21571        assert_eq_m256i(r, _mm256_setzero_si256());
21572        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21573        let e = _mm256_set1_epi8(-1);
21574        assert_eq_m256i(r, e);
21575    }
21576
21577    #[simd_test(enable = "avx512bw,avx512vl")]
21578    fn test_mm256_cvtusepi16_epi8() {
21579        let a = _mm256_set1_epi16(i16::MIN);
21580        let r = _mm256_cvtusepi16_epi8(a);
21581        let e = _mm_set1_epi8(-1);
21582        assert_eq_m128i(r, e);
21583    }
21584
21585    #[simd_test(enable = "avx512bw,avx512vl")]
21586    fn test_mm256_mask_cvtusepi16_epi8() {
21587        let src = _mm_set1_epi8(1);
21588        let a = _mm256_set1_epi16(i16::MIN);
21589        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
21590        assert_eq_m128i(r, src);
21591        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
21592        let e = _mm_set1_epi8(-1);
21593        assert_eq_m128i(r, e);
21594    }
21595
21596    #[simd_test(enable = "avx512bw,avx512vl")]
21597    fn test_mm256_maskz_cvtusepi16_epi8() {
21598        let a = _mm256_set1_epi16(i16::MIN);
21599        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
21600        assert_eq_m128i(r, _mm_setzero_si128());
21601        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
21602        let e = _mm_set1_epi8(-1);
21603        assert_eq_m128i(r, e);
21604    }
21605
21606    #[simd_test(enable = "avx512bw,avx512vl")]
21607    fn test_mm_cvtusepi16_epi8() {
21608        let a = _mm_set1_epi16(i16::MIN);
21609        let r = _mm_cvtusepi16_epi8(a);
21610        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21611        assert_eq_m128i(r, e);
21612    }
21613
21614    #[simd_test(enable = "avx512bw,avx512vl")]
21615    fn test_mm_mask_cvtusepi16_epi8() {
21616        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21617        let a = _mm_set1_epi16(i16::MIN);
21618        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
21619        assert_eq_m128i(r, src);
21620        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
21621        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21622        assert_eq_m128i(r, e);
21623    }
21624
21625    #[simd_test(enable = "avx512bw,avx512vl")]
21626    fn test_mm_maskz_cvtusepi16_epi8() {
21627        let a = _mm_set1_epi16(i16::MIN);
21628        let r = _mm_maskz_cvtusepi16_epi8(0, a);
21629        assert_eq_m128i(r, _mm_setzero_si128());
21630        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
21631        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21632        assert_eq_m128i(r, e);
21633    }
21634
21635    #[simd_test(enable = "avx512bw")]
21636    const fn test_mm512_cvtepi8_epi16() {
21637        let a = _mm256_set1_epi8(2);
21638        let r = _mm512_cvtepi8_epi16(a);
21639        let e = _mm512_set1_epi16(2);
21640        assert_eq_m512i(r, e);
21641    }
21642
21643    #[simd_test(enable = "avx512bw")]
21644    const fn test_mm512_mask_cvtepi8_epi16() {
21645        let src = _mm512_set1_epi16(1);
21646        let a = _mm256_set1_epi8(2);
21647        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
21648        assert_eq_m512i(r, src);
21649        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21650        let e = _mm512_set1_epi16(2);
21651        assert_eq_m512i(r, e);
21652    }
21653
21654    #[simd_test(enable = "avx512bw")]
21655    const fn test_mm512_maskz_cvtepi8_epi16() {
21656        let a = _mm256_set1_epi8(2);
21657        let r = _mm512_maskz_cvtepi8_epi16(0, a);
21658        assert_eq_m512i(r, _mm512_setzero_si512());
21659        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
21660        let e = _mm512_set1_epi16(2);
21661        assert_eq_m512i(r, e);
21662    }
21663
21664    #[simd_test(enable = "avx512bw,avx512vl")]
21665    const fn test_mm256_mask_cvtepi8_epi16() {
21666        let src = _mm256_set1_epi16(1);
21667        let a = _mm_set1_epi8(2);
21668        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
21669        assert_eq_m256i(r, src);
21670        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
21671        let e = _mm256_set1_epi16(2);
21672        assert_eq_m256i(r, e);
21673    }
21674
21675    #[simd_test(enable = "avx512bw,avx512vl")]
21676    const fn test_mm256_maskz_cvtepi8_epi16() {
21677        let a = _mm_set1_epi8(2);
21678        let r = _mm256_maskz_cvtepi8_epi16(0, a);
21679        assert_eq_m256i(r, _mm256_setzero_si256());
21680        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
21681        let e = _mm256_set1_epi16(2);
21682        assert_eq_m256i(r, e);
21683    }
21684
21685    #[simd_test(enable = "avx512bw,avx512vl")]
21686    const fn test_mm_mask_cvtepi8_epi16() {
21687        let src = _mm_set1_epi16(1);
21688        let a = _mm_set1_epi8(2);
21689        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
21690        assert_eq_m128i(r, src);
21691        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
21692        let e = _mm_set1_epi16(2);
21693        assert_eq_m128i(r, e);
21694    }
21695
21696    #[simd_test(enable = "avx512bw,avx512vl")]
21697    const fn test_mm_maskz_cvtepi8_epi16() {
21698        let a = _mm_set1_epi8(2);
21699        let r = _mm_maskz_cvtepi8_epi16(0, a);
21700        assert_eq_m128i(r, _mm_setzero_si128());
21701        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
21702        let e = _mm_set1_epi16(2);
21703        assert_eq_m128i(r, e);
21704    }
21705
21706    #[simd_test(enable = "avx512bw")]
21707    const fn test_mm512_cvtepu8_epi16() {
21708        let a = _mm256_set1_epi8(2);
21709        let r = _mm512_cvtepu8_epi16(a);
21710        let e = _mm512_set1_epi16(2);
21711        assert_eq_m512i(r, e);
21712    }
21713
21714    #[simd_test(enable = "avx512bw")]
21715    const fn test_mm512_mask_cvtepu8_epi16() {
21716        let src = _mm512_set1_epi16(1);
21717        let a = _mm256_set1_epi8(2);
21718        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21719        assert_eq_m512i(r, src);
21720        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21721        let e = _mm512_set1_epi16(2);
21722        assert_eq_m512i(r, e);
21723    }
21724
21725    #[simd_test(enable = "avx512bw")]
21726    const fn test_mm512_maskz_cvtepu8_epi16() {
21727        let a = _mm256_set1_epi8(2);
21728        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21729        assert_eq_m512i(r, _mm512_setzero_si512());
21730        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21731        let e = _mm512_set1_epi16(2);
21732        assert_eq_m512i(r, e);
21733    }
21734
21735    #[simd_test(enable = "avx512bw,avx512vl")]
21736    const fn test_mm256_mask_cvtepu8_epi16() {
21737        let src = _mm256_set1_epi16(1);
21738        let a = _mm_set1_epi8(2);
21739        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21740        assert_eq_m256i(r, src);
21741        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21742        let e = _mm256_set1_epi16(2);
21743        assert_eq_m256i(r, e);
21744    }
21745
21746    #[simd_test(enable = "avx512bw,avx512vl")]
21747    const fn test_mm256_maskz_cvtepu8_epi16() {
21748        let a = _mm_set1_epi8(2);
21749        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21750        assert_eq_m256i(r, _mm256_setzero_si256());
21751        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21752        let e = _mm256_set1_epi16(2);
21753        assert_eq_m256i(r, e);
21754    }
21755
21756    #[simd_test(enable = "avx512bw,avx512vl")]
21757    const fn test_mm_mask_cvtepu8_epi16() {
21758        let src = _mm_set1_epi16(1);
21759        let a = _mm_set1_epi8(2);
21760        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21761        assert_eq_m128i(r, src);
21762        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21763        let e = _mm_set1_epi16(2);
21764        assert_eq_m128i(r, e);
21765    }
21766
21767    #[simd_test(enable = "avx512bw,avx512vl")]
21768    const fn test_mm_maskz_cvtepu8_epi16() {
21769        let a = _mm_set1_epi8(2);
21770        let r = _mm_maskz_cvtepu8_epi16(0, a);
21771        assert_eq_m128i(r, _mm_setzero_si128());
21772        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21773        let e = _mm_set1_epi16(2);
21774        assert_eq_m128i(r, e);
21775    }
21776
21777    #[simd_test(enable = "avx512bw")]
21778    const fn test_mm512_bslli_epi128() {
21779        #[rustfmt::skip]
21780        let a = _mm512_set_epi8(
21781            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21782            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21783            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21784            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21785        );
21786        let r = _mm512_bslli_epi128::<9>(a);
21787        #[rustfmt::skip]
21788        let e = _mm512_set_epi8(
21789            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21790            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21791            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21792            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21793        );
21794        assert_eq_m512i(r, e);
21795    }
21796
21797    #[simd_test(enable = "avx512bw")]
21798    const fn test_mm512_bsrli_epi128() {
21799        #[rustfmt::skip]
21800        let a = _mm512_set_epi8(
21801            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21802            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
21803            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
21804            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
21805        );
21806        let r = _mm512_bsrli_epi128::<3>(a);
21807        #[rustfmt::skip]
21808        let e = _mm512_set_epi8(
21809            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
21810            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
21811            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
21812            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
21813        );
21814        assert_eq_m512i(r, e);
21815    }
21816
21817    #[simd_test(enable = "avx512bw")]
21818    const fn test_mm512_alignr_epi8() {
21819        #[rustfmt::skip]
21820        let a = _mm512_set_epi8(
21821            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21822            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21823            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21824            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21825        );
21826        let b = _mm512_set1_epi8(1);
21827        let r = _mm512_alignr_epi8::<14>(a, b);
21828        #[rustfmt::skip]
21829        let e = _mm512_set_epi8(
21830            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21831            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21832            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21833            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21834        );
21835        assert_eq_m512i(r, e);
21836    }
21837
21838    #[simd_test(enable = "avx512bw")]
21839    const fn test_mm512_mask_alignr_epi8() {
21840        #[rustfmt::skip]
21841        let a = _mm512_set_epi8(
21842            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21843            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21844            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21845            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21846        );
21847        let b = _mm512_set1_epi8(1);
21848        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
21849        assert_eq_m512i(r, a);
21850        let r = _mm512_mask_alignr_epi8::<14>(
21851            a,
21852            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21853            a,
21854            b,
21855        );
21856        #[rustfmt::skip]
21857        let e = _mm512_set_epi8(
21858            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21859            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21860            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21861            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21862        );
21863        assert_eq_m512i(r, e);
21864    }
21865
21866    #[simd_test(enable = "avx512bw")]
21867    const fn test_mm512_maskz_alignr_epi8() {
21868        #[rustfmt::skip]
21869        let a = _mm512_set_epi8(
21870            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21871            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21872            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21873            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21874        );
21875        let b = _mm512_set1_epi8(1);
21876        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21877        assert_eq_m512i(r, _mm512_setzero_si512());
21878        let r = _mm512_maskz_alignr_epi8::<14>(
21879            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21880            a,
21881            b,
21882        );
21883        #[rustfmt::skip]
21884        let e = _mm512_set_epi8(
21885            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21886            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21887            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21888            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21889        );
21890        assert_eq_m512i(r, e);
21891    }
21892
21893    #[simd_test(enable = "avx512bw,avx512vl")]
21894    const fn test_mm256_mask_alignr_epi8() {
21895        #[rustfmt::skip]
21896        let a = _mm256_set_epi8(
21897            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21898            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21899        );
21900        let b = _mm256_set1_epi8(1);
21901        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21902        assert_eq_m256i(r, a);
21903        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21904        #[rustfmt::skip]
21905        let e = _mm256_set_epi8(
21906            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21907            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21908        );
21909        assert_eq_m256i(r, e);
21910    }
21911
21912    #[simd_test(enable = "avx512bw,avx512vl")]
21913    const fn test_mm256_maskz_alignr_epi8() {
21914        #[rustfmt::skip]
21915        let a = _mm256_set_epi8(
21916            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21917            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21918        );
21919        let b = _mm256_set1_epi8(1);
21920        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21921        assert_eq_m256i(r, _mm256_setzero_si256());
21922        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21923        #[rustfmt::skip]
21924        let e = _mm256_set_epi8(
21925            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21926            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21927        );
21928        assert_eq_m256i(r, e);
21929    }
21930
21931    #[simd_test(enable = "avx512bw,avx512vl")]
21932    const fn test_mm_mask_alignr_epi8() {
21933        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21934        let b = _mm_set1_epi8(1);
21935        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21936        assert_eq_m128i(r, a);
21937        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21938        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21939        assert_eq_m128i(r, e);
21940    }
21941
21942    #[simd_test(enable = "avx512bw,avx512vl")]
21943    const fn test_mm_maskz_alignr_epi8() {
21944        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21945        let b = _mm_set1_epi8(1);
21946        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21947        assert_eq_m128i(r, _mm_setzero_si128());
21948        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21949        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21950        assert_eq_m128i(r, e);
21951    }
21952
21953    #[simd_test(enable = "avx512bw")]
21954    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21955        let a = _mm512_set1_epi16(i16::MAX);
21956        let mut r = _mm256_undefined_si256();
21957        _mm512_mask_cvtsepi16_storeu_epi8(
21958            &mut r as *mut _ as *mut i8,
21959            0b11111111_11111111_11111111_11111111,
21960            a,
21961        );
21962        let e = _mm256_set1_epi8(i8::MAX);
21963        assert_eq_m256i(r, e);
21964    }
21965
21966    #[simd_test(enable = "avx512bw,avx512vl")]
21967    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21968        let a = _mm256_set1_epi16(i16::MAX);
21969        let mut r = _mm_undefined_si128();
21970        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21971        let e = _mm_set1_epi8(i8::MAX);
21972        assert_eq_m128i(r, e);
21973    }
21974
21975    #[simd_test(enable = "avx512bw,avx512vl")]
21976    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21977        let a = _mm_set1_epi16(i16::MAX);
21978        let mut r = _mm_set1_epi8(0);
21979        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21980        #[rustfmt::skip]
21981        let e = _mm_set_epi8(
21982            0, 0, 0, 0, 0, 0, 0, 0,
21983            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21984        );
21985        assert_eq_m128i(r, e);
21986    }
21987
21988    #[simd_test(enable = "avx512bw")]
21989    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21990        let a = _mm512_set1_epi16(8);
21991        let mut r = _mm256_undefined_si256();
21992        _mm512_mask_cvtepi16_storeu_epi8(
21993            &mut r as *mut _ as *mut i8,
21994            0b11111111_11111111_11111111_11111111,
21995            a,
21996        );
21997        let e = _mm256_set1_epi8(8);
21998        assert_eq_m256i(r, e);
21999    }
22000
22001    #[simd_test(enable = "avx512bw,avx512vl")]
22002    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
22003        let a = _mm256_set1_epi16(8);
22004        let mut r = _mm_undefined_si128();
22005        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22006        let e = _mm_set1_epi8(8);
22007        assert_eq_m128i(r, e);
22008    }
22009
22010    #[simd_test(enable = "avx512bw,avx512vl")]
22011    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
22012        let a = _mm_set1_epi16(8);
22013        let mut r = _mm_set1_epi8(0);
22014        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22015        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
22016        assert_eq_m128i(r, e);
22017    }
22018
22019    #[simd_test(enable = "avx512bw")]
22020    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
22021        let a = _mm512_set1_epi16(i16::MAX);
22022        let mut r = _mm256_undefined_si256();
22023        _mm512_mask_cvtusepi16_storeu_epi8(
22024            &mut r as *mut _ as *mut i8,
22025            0b11111111_11111111_11111111_11111111,
22026            a,
22027        );
22028        let e = _mm256_set1_epi8(u8::MAX as i8);
22029        assert_eq_m256i(r, e);
22030    }
22031
22032    #[simd_test(enable = "avx512bw,avx512vl")]
22033    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
22034        let a = _mm256_set1_epi16(i16::MAX);
22035        let mut r = _mm_undefined_si128();
22036        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22037        let e = _mm_set1_epi8(u8::MAX as i8);
22038        assert_eq_m128i(r, e);
22039    }
22040
22041    #[simd_test(enable = "avx512bw,avx512vl")]
22042    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
22043        let a = _mm_set1_epi16(i16::MAX);
22044        let mut r = _mm_set1_epi8(0);
22045        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22046        #[rustfmt::skip]
22047        let e = _mm_set_epi8(
22048            0, 0, 0, 0,
22049            0, 0, 0, 0,
22050            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22051            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22052        );
22053        assert_eq_m128i(r, e);
22054    }
22055}