core/stdarch/crates/core_arch/src/x86/
avx512vbmi2.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12#[inline]
13#[target_feature(enable = "avx512vbmi2")]
14#[cfg_attr(test, assert_instr(vpexpandw))]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17    src: __m512i,
18    k: __mmask32,
19    mem_addr: *const i16,
20) -> __m512i {
21    transmute(expandloadw_512(mem_addr, src.as_i16x32(), k))
22}
23
24/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25///
26/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27#[inline]
28#[target_feature(enable = "avx512vbmi2")]
29#[cfg_attr(test, assert_instr(vpexpandw))]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32    _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr)
33}
34
35/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38#[inline]
39#[target_feature(enable = "avx512vbmi2,avx512vl")]
40#[cfg_attr(test, assert_instr(vpexpandw))]
41#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42pub unsafe fn _mm256_mask_expandloadu_epi16(
43    src: __m256i,
44    k: __mmask16,
45    mem_addr: *const i16,
46) -> __m256i {
47    transmute(expandloadw_256(mem_addr, src.as_i16x16(), k))
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512vbmi2,avx512vl")]
55#[cfg_attr(test, assert_instr(vpexpandw))]
56#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
57pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58    _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr)
59}
60
61/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64#[inline]
65#[target_feature(enable = "avx512vbmi2,avx512vl")]
66#[cfg_attr(test, assert_instr(vpexpandw))]
67#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
68pub unsafe fn _mm_mask_expandloadu_epi16(
69    src: __m128i,
70    k: __mmask8,
71    mem_addr: *const i16,
72) -> __m128i {
73    transmute(expandloadw_128(mem_addr, src.as_i16x8(), k))
74}
75
76/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77///
78/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79#[inline]
80#[target_feature(enable = "avx512vbmi2,avx512vl")]
81#[cfg_attr(test, assert_instr(vpexpandw))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84    _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr)
85}
86
87/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90#[inline]
91#[target_feature(enable = "avx512vbmi2")]
92#[cfg_attr(test, assert_instr(vpexpandb))]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94pub unsafe fn _mm512_mask_expandloadu_epi8(
95    src: __m512i,
96    k: __mmask64,
97    mem_addr: *const i8,
98) -> __m512i {
99    transmute(expandloadb_512(mem_addr, src.as_i8x64(), k))
100}
101
102/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105#[inline]
106#[target_feature(enable = "avx512vbmi2")]
107#[cfg_attr(test, assert_instr(vpexpandb))]
108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
109pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110    _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr)
111}
112
113/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116#[inline]
117#[target_feature(enable = "avx512vbmi2,avx512vl")]
118#[cfg_attr(test, assert_instr(vpexpandb))]
119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
120pub unsafe fn _mm256_mask_expandloadu_epi8(
121    src: __m256i,
122    k: __mmask32,
123    mem_addr: *const i8,
124) -> __m256i {
125    transmute(expandloadb_256(mem_addr, src.as_i8x32(), k))
126}
127
128/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131#[inline]
132#[target_feature(enable = "avx512vbmi2,avx512vl")]
133#[cfg_attr(test, assert_instr(vpexpandb))]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136    _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr)
137}
138
139/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142#[inline]
143#[target_feature(enable = "avx512vbmi2,avx512vl")]
144#[cfg_attr(test, assert_instr(vpexpandb))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146pub unsafe fn _mm_mask_expandloadu_epi8(
147    src: __m128i,
148    k: __mmask16,
149    mem_addr: *const i8,
150) -> __m128i {
151    transmute(expandloadb_128(mem_addr, src.as_i8x16(), k))
152}
153
154/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157#[inline]
158#[target_feature(enable = "avx512vbmi2,avx512vl")]
159#[cfg_attr(test, assert_instr(vpexpandb))]
160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
161pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162    _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr)
163}
164
165/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168#[inline]
169#[target_feature(enable = "avx512vbmi2")]
170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
171#[cfg_attr(test, assert_instr(vpcompressw))]
172pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask32, a: __m512i) {
173    vcompressstorew(base_addr as *mut _, a.as_i16x32(), k)
174}
175
176/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179#[inline]
180#[target_feature(enable = "avx512vbmi2,avx512vl")]
181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
182#[cfg_attr(test, assert_instr(vpcompressw))]
183pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask16, a: __m256i) {
184    vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k)
185}
186
187/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188///
189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190#[inline]
191#[target_feature(enable = "avx512vbmi2,avx512vl")]
192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
193#[cfg_attr(test, assert_instr(vpcompressw))]
194pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask8, a: __m128i) {
195    vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k)
196}
197
198/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201#[inline]
202#[target_feature(enable = "avx512vbmi2")]
203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
204#[cfg_attr(test, assert_instr(vpcompressb))]
205pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask64, a: __m512i) {
206    vcompressstoreb(base_addr, a.as_i8x64(), k)
207}
208
209/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212#[inline]
213#[target_feature(enable = "avx512vbmi2,avx512vl")]
214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
215#[cfg_attr(test, assert_instr(vpcompressb))]
216pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask32, a: __m256i) {
217    vcompressstoreb256(base_addr, a.as_i8x32(), k)
218}
219
220/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223#[inline]
224#[target_feature(enable = "avx512vbmi2,avx512vl")]
225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
226#[cfg_attr(test, assert_instr(vpcompressb))]
227pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask16, a: __m128i) {
228    vcompressstoreb128(base_addr, a.as_i8x16(), k)
229}
230
231/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234#[inline]
235#[target_feature(enable = "avx512vbmi2")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpcompressw))]
238pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239    unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) }
240}
241
242/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245#[inline]
246#[target_feature(enable = "avx512vbmi2")]
247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
248#[cfg_attr(test, assert_instr(vpcompressw))]
249pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250    unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) }
251}
252
253/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256#[inline]
257#[target_feature(enable = "avx512vbmi2,avx512vl")]
258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
259#[cfg_attr(test, assert_instr(vpcompressw))]
260pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261    unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) }
262}
263
264/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267#[inline]
268#[target_feature(enable = "avx512vbmi2,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpcompressw))]
271pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272    unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) }
273}
274
275/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278#[inline]
279#[target_feature(enable = "avx512vbmi2,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpcompressw))]
282pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283    unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) }
284}
285
286/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289#[inline]
290#[target_feature(enable = "avx512vbmi2,avx512vl")]
291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
292#[cfg_attr(test, assert_instr(vpcompressw))]
293pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294    unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) }
295}
296
297/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300#[inline]
301#[target_feature(enable = "avx512vbmi2")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpcompressb))]
304pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305    unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) }
306}
307
308/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311#[inline]
312#[target_feature(enable = "avx512vbmi2")]
313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
314#[cfg_attr(test, assert_instr(vpcompressb))]
315pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316    unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) }
317}
318
319/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322#[inline]
323#[target_feature(enable = "avx512vbmi2,avx512vl")]
324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
325#[cfg_attr(test, assert_instr(vpcompressb))]
326pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327    unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) }
328}
329
330/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333#[inline]
334#[target_feature(enable = "avx512vbmi2,avx512vl")]
335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
336#[cfg_attr(test, assert_instr(vpcompressb))]
337pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338    unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) }
339}
340
341/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344#[inline]
345#[target_feature(enable = "avx512vbmi2,avx512vl")]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[cfg_attr(test, assert_instr(vpcompressb))]
348pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349    unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) }
350}
351
352/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355#[inline]
356#[target_feature(enable = "avx512vbmi2,avx512vl")]
357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
358#[cfg_attr(test, assert_instr(vpcompressb))]
359pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360    unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) }
361}
362
363/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366#[inline]
367#[target_feature(enable = "avx512vbmi2")]
368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
369#[cfg_attr(test, assert_instr(vpexpandw))]
370pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371    unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) }
372}
373
374/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377#[inline]
378#[target_feature(enable = "avx512vbmi2")]
379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
380#[cfg_attr(test, assert_instr(vpexpandw))]
381pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382    unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) }
383}
384
385/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388#[inline]
389#[target_feature(enable = "avx512vbmi2,avx512vl")]
390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391#[cfg_attr(test, assert_instr(vpexpandw))]
392pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393    unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) }
394}
395
396/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399#[inline]
400#[target_feature(enable = "avx512vbmi2,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpexpandw))]
403pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404    unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) }
405}
406
407/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410#[inline]
411#[target_feature(enable = "avx512vbmi2,avx512vl")]
412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
413#[cfg_attr(test, assert_instr(vpexpandw))]
414pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415    unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) }
416}
417
418/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419///
420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421#[inline]
422#[target_feature(enable = "avx512vbmi2,avx512vl")]
423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
424#[cfg_attr(test, assert_instr(vpexpandw))]
425pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426    unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) }
427}
428
429/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432#[inline]
433#[target_feature(enable = "avx512vbmi2")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vpexpandb))]
436pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437    unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) }
438}
439
440/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443#[inline]
444#[target_feature(enable = "avx512vbmi2")]
445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
446#[cfg_attr(test, assert_instr(vpexpandb))]
447pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448    unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) }
449}
450
451/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454#[inline]
455#[target_feature(enable = "avx512vbmi2,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpexpandb))]
458pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459    unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) }
460}
461
462/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465#[inline]
466#[target_feature(enable = "avx512vbmi2,avx512vl")]
467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
468#[cfg_attr(test, assert_instr(vpexpandb))]
469pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470    unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) }
471}
472
473/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476#[inline]
477#[target_feature(enable = "avx512vbmi2,avx512vl")]
478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
479#[cfg_attr(test, assert_instr(vpexpandb))]
480pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481    unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) }
482}
483
484/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487#[inline]
488#[target_feature(enable = "avx512vbmi2,avx512vl")]
489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
490#[cfg_attr(test, assert_instr(vpexpandb))]
491pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492    unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) }
493}
494
495/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498#[inline]
499#[target_feature(enable = "avx512vbmi2")]
500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
501#[cfg_attr(test, assert_instr(vpshldvq))]
502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
503pub const fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
504    unsafe {
505        transmute(simd_funnel_shl(
506            a.as_i64x8(),
507            b.as_i64x8(),
508            simd_and(c.as_i64x8(), i64x8::splat(63)),
509        ))
510    }
511}
512
513/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
516#[inline]
517#[target_feature(enable = "avx512vbmi2")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpshldvq))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
522    unsafe {
523        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
524        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
525    }
526}
527
528/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
531#[inline]
532#[target_feature(enable = "avx512vbmi2")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpshldvq))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
537    unsafe {
538        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
539        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
540    }
541}
542
543/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
544///
545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
546#[inline]
547#[target_feature(enable = "avx512vbmi2,avx512vl")]
548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
549#[cfg_attr(test, assert_instr(vpshldvq))]
550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
551pub const fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
552    unsafe {
553        transmute(simd_funnel_shl(
554            a.as_i64x4(),
555            b.as_i64x4(),
556            simd_and(c.as_i64x4(), i64x4::splat(63)),
557        ))
558    }
559}
560
561/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
562///
563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
564#[inline]
565#[target_feature(enable = "avx512vbmi2,avx512vl")]
566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
567#[cfg_attr(test, assert_instr(vpshldvq))]
568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
569pub const fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
570    unsafe {
571        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
572        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
573    }
574}
575
576/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
577///
578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
579#[inline]
580#[target_feature(enable = "avx512vbmi2,avx512vl")]
581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
582#[cfg_attr(test, assert_instr(vpshldvq))]
583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
584pub const fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
585    unsafe {
586        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
587        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
588    }
589}
590
591/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
592///
593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
594#[inline]
595#[target_feature(enable = "avx512vbmi2,avx512vl")]
596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
597#[cfg_attr(test, assert_instr(vpshldvq))]
598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
599pub const fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
600    unsafe {
601        transmute(simd_funnel_shl(
602            a.as_i64x2(),
603            b.as_i64x2(),
604            simd_and(c.as_i64x2(), i64x2::splat(63)),
605        ))
606    }
607}
608
609/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
612#[inline]
613#[target_feature(enable = "avx512vbmi2,avx512vl")]
614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615#[cfg_attr(test, assert_instr(vpshldvq))]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
618    unsafe {
619        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
620        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
621    }
622}
623
624/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625///
626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
627#[inline]
628#[target_feature(enable = "avx512vbmi2,avx512vl")]
629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630#[cfg_attr(test, assert_instr(vpshldvq))]
631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632pub const fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
633    unsafe {
634        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
635        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
636    }
637}
638
639/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
642#[inline]
643#[target_feature(enable = "avx512vbmi2")]
644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645#[cfg_attr(test, assert_instr(vpshldvd))]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
648    unsafe {
649        transmute(simd_funnel_shl(
650            a.as_i32x16(),
651            b.as_i32x16(),
652            simd_and(c.as_i32x16(), i32x16::splat(31)),
653        ))
654    }
655}
656
657/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
660#[inline]
661#[target_feature(enable = "avx512vbmi2")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpshldvd))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
666    unsafe {
667        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
668        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
669    }
670}
671
672/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
675#[inline]
676#[target_feature(enable = "avx512vbmi2")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpshldvd))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
681    unsafe {
682        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
683        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
684    }
685}
686
687/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
690#[inline]
691#[target_feature(enable = "avx512vbmi2,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpshldvd))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
696    unsafe {
697        transmute(simd_funnel_shl(
698            a.as_i32x8(),
699            b.as_i32x8(),
700            simd_and(c.as_i32x8(), i32x8::splat(31)),
701        ))
702    }
703}
704
705/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
706///
707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
708#[inline]
709#[target_feature(enable = "avx512vbmi2,avx512vl")]
710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
711#[cfg_attr(test, assert_instr(vpshldvd))]
712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
713pub const fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
714    unsafe {
715        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
716        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
717    }
718}
719
720/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
723#[inline]
724#[target_feature(enable = "avx512vbmi2,avx512vl")]
725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
726#[cfg_attr(test, assert_instr(vpshldvd))]
727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
728pub const fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
729    unsafe {
730        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
731        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
732    }
733}
734
735/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
736///
737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
738#[inline]
739#[target_feature(enable = "avx512vbmi2,avx512vl")]
740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
741#[cfg_attr(test, assert_instr(vpshldvd))]
742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
743pub const fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
744    unsafe {
745        transmute(simd_funnel_shl(
746            a.as_i32x4(),
747            b.as_i32x4(),
748            simd_and(c.as_i32x4(), i32x4::splat(31)),
749        ))
750    }
751}
752
753/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
754///
755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
756#[inline]
757#[target_feature(enable = "avx512vbmi2,avx512vl")]
758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
759#[cfg_attr(test, assert_instr(vpshldvd))]
760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
761pub const fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
762    unsafe {
763        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
764        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
765    }
766}
767
768/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
769///
770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
771#[inline]
772#[target_feature(enable = "avx512vbmi2,avx512vl")]
773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
774#[cfg_attr(test, assert_instr(vpshldvd))]
775#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
776pub const fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
777    unsafe {
778        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
779        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
780    }
781}
782
783/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
786#[inline]
787#[target_feature(enable = "avx512vbmi2")]
788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789#[cfg_attr(test, assert_instr(vpshldvw))]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
792    unsafe {
793        transmute(simd_funnel_shl(
794            a.as_i16x32(),
795            b.as_i16x32(),
796            simd_and(c.as_i16x32(), i16x32::splat(15)),
797        ))
798    }
799}
800
801/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
804#[inline]
805#[target_feature(enable = "avx512vbmi2")]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[cfg_attr(test, assert_instr(vpshldvw))]
808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
809pub const fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
810    unsafe {
811        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
812        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
813    }
814}
815
816/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
817///
818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
819#[inline]
820#[target_feature(enable = "avx512vbmi2")]
821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
822#[cfg_attr(test, assert_instr(vpshldvw))]
823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
824pub const fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
825    unsafe {
826        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
827        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
828    }
829}
830
831/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
834#[inline]
835#[target_feature(enable = "avx512vbmi2,avx512vl")]
836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
837#[cfg_attr(test, assert_instr(vpshldvw))]
838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
839pub const fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
840    unsafe {
841        transmute(simd_funnel_shl(
842            a.as_i16x16(),
843            b.as_i16x16(),
844            simd_and(c.as_i16x16(), i16x16::splat(15)),
845        ))
846    }
847}
848
849/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
850///
851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
852#[inline]
853#[target_feature(enable = "avx512vbmi2,avx512vl")]
854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
855#[cfg_attr(test, assert_instr(vpshldvw))]
856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
857pub const fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
858    unsafe {
859        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
860        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
861    }
862}
863
864/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
865///
866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
867#[inline]
868#[target_feature(enable = "avx512vbmi2,avx512vl")]
869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
870#[cfg_attr(test, assert_instr(vpshldvw))]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
873    unsafe {
874        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
875        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
876    }
877}
878
879/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
880///
881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
882#[inline]
883#[target_feature(enable = "avx512vbmi2,avx512vl")]
884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
885#[cfg_attr(test, assert_instr(vpshldvw))]
886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
887pub const fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
888    unsafe {
889        transmute(simd_funnel_shl(
890            a.as_i16x8(),
891            b.as_i16x8(),
892            simd_and(c.as_i16x8(), i16x8::splat(15)),
893        ))
894    }
895}
896
897/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
898///
899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
900#[inline]
901#[target_feature(enable = "avx512vbmi2,avx512vl")]
902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
903#[cfg_attr(test, assert_instr(vpshldvw))]
904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
905pub const fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
906    unsafe {
907        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
908        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
909    }
910}
911
912/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
915#[inline]
916#[target_feature(enable = "avx512vbmi2,avx512vl")]
917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
918#[cfg_attr(test, assert_instr(vpshldvw))]
919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
920pub const fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
921    unsafe {
922        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
923        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
924    }
925}
926
927/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
928///
929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
930#[inline]
931#[target_feature(enable = "avx512vbmi2")]
932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
933#[cfg_attr(test, assert_instr(vpshrdvq))]
934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
935pub const fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
936    unsafe {
937        transmute(simd_funnel_shr(
938            b.as_i64x8(),
939            a.as_i64x8(),
940            simd_and(c.as_i64x8(), i64x8::splat(63)),
941        ))
942    }
943}
944
945/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
948#[inline]
949#[target_feature(enable = "avx512vbmi2")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vpshrdvq))]
952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953pub const fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
954    unsafe {
955        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
956        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
957    }
958}
959
960/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
963#[inline]
964#[target_feature(enable = "avx512vbmi2")]
965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966#[cfg_attr(test, assert_instr(vpshrdvq))]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
969    unsafe {
970        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
971        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
972    }
973}
974
975/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
978#[inline]
979#[target_feature(enable = "avx512vbmi2,avx512vl")]
980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981#[cfg_attr(test, assert_instr(vpshrdvq))]
982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983pub const fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
984    unsafe {
985        transmute(simd_funnel_shr(
986            b.as_i64x4(),
987            a.as_i64x4(),
988            simd_and(c.as_i64x4(), i64x4::splat(63)),
989        ))
990    }
991}
992
993/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
996#[inline]
997#[target_feature(enable = "avx512vbmi2,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpshrdvq))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1002    unsafe {
1003        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1004        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
1005    }
1006}
1007
1008/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
1011#[inline]
1012#[target_feature(enable = "avx512vbmi2,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpshrdvq))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1017    unsafe {
1018        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
1019        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1020    }
1021}
1022
1023/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
1026#[inline]
1027#[target_feature(enable = "avx512vbmi2,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpshrdvq))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1032    unsafe {
1033        transmute(simd_funnel_shr(
1034            b.as_i64x2(),
1035            a.as_i64x2(),
1036            simd_and(c.as_i64x2(), i64x2::splat(63)),
1037        ))
1038    }
1039}
1040
1041/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1042///
1043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
1044#[inline]
1045#[target_feature(enable = "avx512vbmi2,avx512vl")]
1046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1047#[cfg_attr(test, assert_instr(vpshrdvq))]
1048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1049pub const fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1050    unsafe {
1051        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
1052        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
1053    }
1054}
1055
1056/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1057///
1058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
1059#[inline]
1060#[target_feature(enable = "avx512vbmi2,avx512vl")]
1061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1062#[cfg_attr(test, assert_instr(vpshrdvq))]
1063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1064pub const fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1065    unsafe {
1066        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
1067        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1068    }
1069}
1070
1071/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1072///
1073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
1074#[inline]
1075#[target_feature(enable = "avx512vbmi2")]
1076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1077#[cfg_attr(test, assert_instr(vpshrdvd))]
1078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1079pub const fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1080    unsafe {
1081        transmute(simd_funnel_shr(
1082            b.as_i32x16(),
1083            a.as_i32x16(),
1084            simd_and(c.as_i32x16(), i32x16::splat(31)),
1085        ))
1086    }
1087}
1088
1089/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1090///
1091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
1092#[inline]
1093#[target_feature(enable = "avx512vbmi2")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[cfg_attr(test, assert_instr(vpshrdvd))]
1096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097pub const fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
1098    unsafe {
1099        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1100        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
1101    }
1102}
1103
1104/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
1107#[inline]
1108#[target_feature(enable = "avx512vbmi2")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[cfg_attr(test, assert_instr(vpshrdvd))]
1111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112pub const fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1113    unsafe {
1114        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1115        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1116    }
1117}
1118
1119/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1122#[inline]
1123#[target_feature(enable = "avx512vbmi2,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpshrdvd))]
1126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127pub const fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1128    unsafe {
1129        transmute(simd_funnel_shr(
1130            b.as_i32x8(),
1131            a.as_i32x8(),
1132            simd_and(c.as_i32x8(), i32x8::splat(31)),
1133        ))
1134    }
1135}
1136
1137/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1138///
1139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1140#[inline]
1141#[target_feature(enable = "avx512vbmi2,avx512vl")]
1142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1143#[cfg_attr(test, assert_instr(vpshrdvd))]
1144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1145pub const fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1146    unsafe {
1147        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1148        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
1149    }
1150}
1151
1152/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1155#[inline]
1156#[target_feature(enable = "avx512vbmi2,avx512vl")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpshrdvd))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1161    unsafe {
1162        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1163        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1164    }
1165}
1166
1167/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1170#[inline]
1171#[target_feature(enable = "avx512vbmi2,avx512vl")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpshrdvd))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1176    unsafe {
1177        transmute(simd_funnel_shr(
1178            b.as_i32x4(),
1179            a.as_i32x4(),
1180            simd_and(c.as_i32x4(), i32x4::splat(31)),
1181        ))
1182    }
1183}
1184
1185/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1186///
1187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1188#[inline]
1189#[target_feature(enable = "avx512vbmi2,avx512vl")]
1190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1191#[cfg_attr(test, assert_instr(vpshrdvd))]
1192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1193pub const fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1194    unsafe {
1195        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1196        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
1197    }
1198}
1199
1200/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1201///
1202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1203#[inline]
1204#[target_feature(enable = "avx512vbmi2,avx512vl")]
1205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1206#[cfg_attr(test, assert_instr(vpshrdvd))]
1207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1208pub const fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1209    unsafe {
1210        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1211        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1212    }
1213}
1214
1215/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1216///
1217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1218#[inline]
1219#[target_feature(enable = "avx512vbmi2")]
1220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1221#[cfg_attr(test, assert_instr(vpshrdvw))]
1222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1223pub const fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1224    unsafe {
1225        transmute(simd_funnel_shr(
1226            b.as_i16x32(),
1227            a.as_i16x32(),
1228            simd_and(c.as_i16x32(), i16x32::splat(15)),
1229        ))
1230    }
1231}
1232
1233/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1234///
1235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1236#[inline]
1237#[target_feature(enable = "avx512vbmi2")]
1238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1239#[cfg_attr(test, assert_instr(vpshrdvw))]
1240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1241pub const fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1242    unsafe {
1243        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1244        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
1245    }
1246}
1247
1248/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1251#[inline]
1252#[target_feature(enable = "avx512vbmi2")]
1253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1254#[cfg_attr(test, assert_instr(vpshrdvw))]
1255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1256pub const fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1257    unsafe {
1258        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1259        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1260    }
1261}
1262
1263/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1264///
1265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1266#[inline]
1267#[target_feature(enable = "avx512vbmi2,avx512vl")]
1268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1269#[cfg_attr(test, assert_instr(vpshrdvw))]
1270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1271pub const fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1272    unsafe {
1273        transmute(simd_funnel_shr(
1274            b.as_i16x16(),
1275            a.as_i16x16(),
1276            simd_and(c.as_i16x16(), i16x16::splat(15)),
1277        ))
1278    }
1279}
1280
1281/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1284#[inline]
1285#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vpshrdvw))]
1288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289pub const fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1290    unsafe {
1291        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1292        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
1293    }
1294}
1295
1296/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1297///
1298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1299#[inline]
1300#[target_feature(enable = "avx512vbmi2,avx512vl")]
1301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1302#[cfg_attr(test, assert_instr(vpshrdvw))]
1303#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1304pub const fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1305    unsafe {
1306        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1307        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1308    }
1309}
1310
1311/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1312///
1313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1314#[inline]
1315#[target_feature(enable = "avx512vbmi2,avx512vl")]
1316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1317#[cfg_attr(test, assert_instr(vpshrdvw))]
1318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1319pub const fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1320    unsafe {
1321        transmute(simd_funnel_shr(
1322            b.as_i16x8(),
1323            a.as_i16x8(),
1324            simd_and(c.as_i16x8(), i16x8::splat(15)),
1325        ))
1326    }
1327}
1328
1329/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1332#[inline]
1333#[target_feature(enable = "avx512vbmi2,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpshrdvw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1338    unsafe {
1339        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1340        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
1341    }
1342}
1343
1344/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1347#[inline]
1348#[target_feature(enable = "avx512vbmi2,avx512vl")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpshrdvw))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1353    unsafe {
1354        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1355        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1356    }
1357}
1358
1359/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1360///
1361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1362#[inline]
1363#[target_feature(enable = "avx512vbmi2")]
1364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1365#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1366#[rustc_legacy_const_generics(2)]
1367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1368pub const fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1369    static_assert_uimm_bits!(IMM8, 8);
1370    _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1371}
1372
1373/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1374///
1375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1376#[inline]
1377#[target_feature(enable = "avx512vbmi2")]
1378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1379#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1380#[rustc_legacy_const_generics(4)]
1381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1382pub const fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1383    src: __m512i,
1384    k: __mmask8,
1385    a: __m512i,
1386    b: __m512i,
1387) -> __m512i {
1388    unsafe {
1389        static_assert_uimm_bits!(IMM8, 8);
1390        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1391        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1392    }
1393}
1394
1395/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1398#[inline]
1399#[target_feature(enable = "avx512vbmi2")]
1400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1402#[rustc_legacy_const_generics(3)]
1403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1404pub const fn _mm512_maskz_shldi_epi64<const IMM8: i32>(
1405    k: __mmask8,
1406    a: __m512i,
1407    b: __m512i,
1408) -> __m512i {
1409    unsafe {
1410        static_assert_uimm_bits!(IMM8, 8);
1411        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1412        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1413    }
1414}
1415
1416/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1419#[inline]
1420#[target_feature(enable = "avx512vbmi2,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1423#[rustc_legacy_const_generics(2)]
1424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1425pub const fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1426    static_assert_uimm_bits!(IMM8, 8);
1427    _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1428}
1429
1430/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1431///
1432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1433#[inline]
1434#[target_feature(enable = "avx512vbmi2,avx512vl")]
1435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1436#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1437#[rustc_legacy_const_generics(4)]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1440    src: __m256i,
1441    k: __mmask8,
1442    a: __m256i,
1443    b: __m256i,
1444) -> __m256i {
1445    unsafe {
1446        static_assert_uimm_bits!(IMM8, 8);
1447        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1448        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1449    }
1450}
1451
1452/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1453///
1454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1455#[inline]
1456#[target_feature(enable = "avx512vbmi2,avx512vl")]
1457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1458#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1459#[rustc_legacy_const_generics(3)]
1460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1461pub const fn _mm256_maskz_shldi_epi64<const IMM8: i32>(
1462    k: __mmask8,
1463    a: __m256i,
1464    b: __m256i,
1465) -> __m256i {
1466    unsafe {
1467        static_assert_uimm_bits!(IMM8, 8);
1468        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1469        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1470    }
1471}
1472
1473/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1474///
1475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1476#[inline]
1477#[target_feature(enable = "avx512vbmi2,avx512vl")]
1478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1479#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1480#[rustc_legacy_const_generics(2)]
1481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1482pub const fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1483    static_assert_uimm_bits!(IMM8, 8);
1484    _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1485}
1486
1487/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1488///
1489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1490#[inline]
1491#[target_feature(enable = "avx512vbmi2,avx512vl")]
1492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1493#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1494#[rustc_legacy_const_generics(4)]
1495#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1496pub const fn _mm_mask_shldi_epi64<const IMM8: i32>(
1497    src: __m128i,
1498    k: __mmask8,
1499    a: __m128i,
1500    b: __m128i,
1501) -> __m128i {
1502    unsafe {
1503        static_assert_uimm_bits!(IMM8, 8);
1504        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1505        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1506    }
1507}
1508
1509/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1512#[inline]
1513#[target_feature(enable = "avx512vbmi2,avx512vl")]
1514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1515#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1516#[rustc_legacy_const_generics(3)]
1517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1518pub const fn _mm_maskz_shldi_epi64<const IMM8: i32>(
1519    k: __mmask8,
1520    a: __m128i,
1521    b: __m128i,
1522) -> __m128i {
1523    unsafe {
1524        static_assert_uimm_bits!(IMM8, 8);
1525        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1526        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1527    }
1528}
1529
1530/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1531///
1532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1533#[inline]
1534#[target_feature(enable = "avx512vbmi2")]
1535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1536#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1537#[rustc_legacy_const_generics(2)]
1538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539pub const fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1540    static_assert_uimm_bits!(IMM8, 8);
1541    _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
1542}
1543
1544/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1545///
1546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1547#[inline]
1548#[target_feature(enable = "avx512vbmi2")]
1549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1550#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1551#[rustc_legacy_const_generics(4)]
1552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1553pub const fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1554    src: __m512i,
1555    k: __mmask16,
1556    a: __m512i,
1557    b: __m512i,
1558) -> __m512i {
1559    unsafe {
1560        static_assert_uimm_bits!(IMM8, 8);
1561        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1562        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1563    }
1564}
1565
1566/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1567///
1568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1569#[inline]
1570#[target_feature(enable = "avx512vbmi2")]
1571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1572#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1573#[rustc_legacy_const_generics(3)]
1574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1575pub const fn _mm512_maskz_shldi_epi32<const IMM8: i32>(
1576    k: __mmask16,
1577    a: __m512i,
1578    b: __m512i,
1579) -> __m512i {
1580    unsafe {
1581        static_assert_uimm_bits!(IMM8, 8);
1582        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1583        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1584    }
1585}
1586
1587/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1590#[inline]
1591#[target_feature(enable = "avx512vbmi2,avx512vl")]
1592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1593#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1594#[rustc_legacy_const_generics(2)]
1595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1596pub const fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1597    static_assert_uimm_bits!(IMM8, 8);
1598    _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
1599}
1600
1601/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1602///
1603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1604#[inline]
1605#[target_feature(enable = "avx512vbmi2,avx512vl")]
1606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1607#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1608#[rustc_legacy_const_generics(4)]
1609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1610pub const fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1611    src: __m256i,
1612    k: __mmask8,
1613    a: __m256i,
1614    b: __m256i,
1615) -> __m256i {
1616    unsafe {
1617        static_assert_uimm_bits!(IMM8, 8);
1618        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1619        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1620    }
1621}
1622
1623/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1626#[inline]
1627#[target_feature(enable = "avx512vbmi2,avx512vl")]
1628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1630#[rustc_legacy_const_generics(3)]
1631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1632pub const fn _mm256_maskz_shldi_epi32<const IMM8: i32>(
1633    k: __mmask8,
1634    a: __m256i,
1635    b: __m256i,
1636) -> __m256i {
1637    unsafe {
1638        static_assert_uimm_bits!(IMM8, 8);
1639        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1640        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1641    }
1642}
1643
1644/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1645///
1646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1647#[inline]
1648#[target_feature(enable = "avx512vbmi2,avx512vl")]
1649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1650#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1651#[rustc_legacy_const_generics(2)]
1652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1653pub const fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1654    static_assert_uimm_bits!(IMM8, 8);
1655    _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
1656}
1657
1658/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1659///
1660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1661#[inline]
1662#[target_feature(enable = "avx512vbmi2,avx512vl")]
1663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1664#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1665#[rustc_legacy_const_generics(4)]
1666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1667pub const fn _mm_mask_shldi_epi32<const IMM8: i32>(
1668    src: __m128i,
1669    k: __mmask8,
1670    a: __m128i,
1671    b: __m128i,
1672) -> __m128i {
1673    unsafe {
1674        static_assert_uimm_bits!(IMM8, 8);
1675        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1676        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1677    }
1678}
1679
1680/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1683#[inline]
1684#[target_feature(enable = "avx512vbmi2,avx512vl")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1687#[rustc_legacy_const_generics(3)]
1688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1689pub const fn _mm_maskz_shldi_epi32<const IMM8: i32>(
1690    k: __mmask8,
1691    a: __m128i,
1692    b: __m128i,
1693) -> __m128i {
1694    unsafe {
1695        static_assert_uimm_bits!(IMM8, 8);
1696        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1697        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1698    }
1699}
1700
1701/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1702///
1703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1704#[inline]
1705#[target_feature(enable = "avx512vbmi2")]
1706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1707#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1708#[rustc_legacy_const_generics(2)]
1709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1710pub const fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1711    static_assert_uimm_bits!(IMM8, 8);
1712    _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1713}
1714
1715/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1716///
1717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1718#[inline]
1719#[target_feature(enable = "avx512vbmi2")]
1720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1721#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1722#[rustc_legacy_const_generics(4)]
1723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1724pub const fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1725    src: __m512i,
1726    k: __mmask32,
1727    a: __m512i,
1728    b: __m512i,
1729) -> __m512i {
1730    unsafe {
1731        static_assert_uimm_bits!(IMM8, 8);
1732        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1733        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1734    }
1735}
1736
1737/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1738///
1739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1740#[inline]
1741#[target_feature(enable = "avx512vbmi2")]
1742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1743#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1744#[rustc_legacy_const_generics(3)]
1745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1746pub const fn _mm512_maskz_shldi_epi16<const IMM8: i32>(
1747    k: __mmask32,
1748    a: __m512i,
1749    b: __m512i,
1750) -> __m512i {
1751    unsafe {
1752        static_assert_uimm_bits!(IMM8, 8);
1753        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1754        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1755    }
1756}
1757
1758/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1759///
1760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1761#[inline]
1762#[target_feature(enable = "avx512vbmi2,avx512vl")]
1763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1764#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1765#[rustc_legacy_const_generics(2)]
1766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1767pub const fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1768    static_assert_uimm_bits!(IMM8, 8);
1769    _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1770}
1771
1772/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1773///
1774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1775#[inline]
1776#[target_feature(enable = "avx512vbmi2,avx512vl")]
1777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1778#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1779#[rustc_legacy_const_generics(4)]
1780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1781pub const fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1782    src: __m256i,
1783    k: __mmask16,
1784    a: __m256i,
1785    b: __m256i,
1786) -> __m256i {
1787    unsafe {
1788        static_assert_uimm_bits!(IMM8, 8);
1789        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1790        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1791    }
1792}
1793
1794/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795///
1796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1797#[inline]
1798#[target_feature(enable = "avx512vbmi2,avx512vl")]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1801#[rustc_legacy_const_generics(3)]
1802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1803pub const fn _mm256_maskz_shldi_epi16<const IMM8: i32>(
1804    k: __mmask16,
1805    a: __m256i,
1806    b: __m256i,
1807) -> __m256i {
1808    unsafe {
1809        static_assert_uimm_bits!(IMM8, 8);
1810        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1811        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1812    }
1813}
1814
1815/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1816///
1817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1818#[inline]
1819#[target_feature(enable = "avx512vbmi2,avx512vl")]
1820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1821#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1822#[rustc_legacy_const_generics(2)]
1823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1824pub const fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1825    static_assert_uimm_bits!(IMM8, 8);
1826    _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
1827}
1828
1829/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1830///
1831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1832#[inline]
1833#[target_feature(enable = "avx512vbmi2,avx512vl")]
1834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1835#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1836#[rustc_legacy_const_generics(4)]
1837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1838pub const fn _mm_mask_shldi_epi16<const IMM8: i32>(
1839    src: __m128i,
1840    k: __mmask8,
1841    a: __m128i,
1842    b: __m128i,
1843) -> __m128i {
1844    unsafe {
1845        static_assert_uimm_bits!(IMM8, 8);
1846        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1847        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1848    }
1849}
1850
1851/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1854#[inline]
1855#[target_feature(enable = "avx512vbmi2,avx512vl")]
1856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1858#[rustc_legacy_const_generics(3)]
1859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1860pub const fn _mm_maskz_shldi_epi16<const IMM8: i32>(
1861    k: __mmask8,
1862    a: __m128i,
1863    b: __m128i,
1864) -> __m128i {
1865    unsafe {
1866        static_assert_uimm_bits!(IMM8, 8);
1867        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1869    }
1870}
1871
1872/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1875#[inline]
1876#[target_feature(enable = "avx512vbmi2")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1879#[rustc_legacy_const_generics(2)]
1880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1881pub const fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1882    static_assert_uimm_bits!(IMM8, 8);
1883    _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1884}
1885
1886/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1887///
1888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1889#[inline]
1890#[target_feature(enable = "avx512vbmi2")]
1891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1892#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1893#[rustc_legacy_const_generics(4)]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1896    src: __m512i,
1897    k: __mmask8,
1898    a: __m512i,
1899    b: __m512i,
1900) -> __m512i {
1901    unsafe {
1902        static_assert_uimm_bits!(IMM8, 8);
1903        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1904        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1905    }
1906}
1907
1908/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1911#[inline]
1912#[target_feature(enable = "avx512vbmi2")]
1913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1914#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1915#[rustc_legacy_const_generics(3)]
1916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1917pub const fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(
1918    k: __mmask8,
1919    a: __m512i,
1920    b: __m512i,
1921) -> __m512i {
1922    unsafe {
1923        static_assert_uimm_bits!(IMM8, 8);
1924        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1925        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1926    }
1927}
1928
1929/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1932#[inline]
1933#[target_feature(enable = "avx512vbmi2,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1936#[rustc_legacy_const_generics(2)]
1937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1938pub const fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1939    static_assert_uimm_bits!(IMM8, 8);
1940    _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1941}
1942
1943/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1944///
1945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1946#[inline]
1947#[target_feature(enable = "avx512vbmi2,avx512vl")]
1948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1949#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1950#[rustc_legacy_const_generics(4)]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1953    src: __m256i,
1954    k: __mmask8,
1955    a: __m256i,
1956    b: __m256i,
1957) -> __m256i {
1958    unsafe {
1959        static_assert_uimm_bits!(IMM8, 8);
1960        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1961        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1962    }
1963}
1964
1965/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1968#[inline]
1969#[target_feature(enable = "avx512vbmi2,avx512vl")]
1970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1971#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1972#[rustc_legacy_const_generics(3)]
1973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1974pub const fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(
1975    k: __mmask8,
1976    a: __m256i,
1977    b: __m256i,
1978) -> __m256i {
1979    unsafe {
1980        static_assert_uimm_bits!(IMM8, 8);
1981        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1982        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1983    }
1984}
1985
1986/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1987///
1988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1989#[inline]
1990#[target_feature(enable = "avx512vbmi2,avx512vl")]
1991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1992#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1993#[rustc_legacy_const_generics(2)]
1994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1995pub const fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1996    static_assert_uimm_bits!(IMM8, 8);
1997    _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1998}
1999
2000/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
2001///
2002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
2003#[inline]
2004#[target_feature(enable = "avx512vbmi2,avx512vl")]
2005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2006#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
2007#[rustc_legacy_const_generics(4)]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm_mask_shrdi_epi64<const IMM8: i32>(
2010    src: __m128i,
2011    k: __mmask8,
2012    a: __m128i,
2013    b: __m128i,
2014) -> __m128i {
2015    unsafe {
2016        static_assert_uimm_bits!(IMM8, 8);
2017        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
2018        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
2019    }
2020}
2021
2022/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2023///
2024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
2025#[inline]
2026#[target_feature(enable = "avx512vbmi2,avx512vl")]
2027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2028#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
2029#[rustc_legacy_const_generics(3)]
2030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2031pub const fn _mm_maskz_shrdi_epi64<const IMM8: i32>(
2032    k: __mmask8,
2033    a: __m128i,
2034    b: __m128i,
2035) -> __m128i {
2036    unsafe {
2037        static_assert_uimm_bits!(IMM8, 8);
2038        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
2039        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
2040    }
2041}
2042
2043/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
2046#[inline]
2047#[target_feature(enable = "avx512vbmi2")]
2048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2050#[rustc_legacy_const_generics(2)]
2051#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2052pub const fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2053    static_assert_uimm_bits!(IMM8, 8);
2054    _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
2055}
2056
2057/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2058///
2059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
2060#[inline]
2061#[target_feature(enable = "avx512vbmi2")]
2062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2063#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2064#[rustc_legacy_const_generics(4)]
2065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2066pub const fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
2067    src: __m512i,
2068    k: __mmask16,
2069    a: __m512i,
2070    b: __m512i,
2071) -> __m512i {
2072    unsafe {
2073        static_assert_uimm_bits!(IMM8, 8);
2074        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
2075        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
2076    }
2077}
2078
2079/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2080///
2081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
2082#[inline]
2083#[target_feature(enable = "avx512vbmi2")]
2084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2085#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2086#[rustc_legacy_const_generics(3)]
2087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2088pub const fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(
2089    k: __mmask16,
2090    a: __m512i,
2091    b: __m512i,
2092) -> __m512i {
2093    unsafe {
2094        static_assert_uimm_bits!(IMM8, 8);
2095        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
2096        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
2097    }
2098}
2099
2100/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
2103#[inline]
2104#[target_feature(enable = "avx512vbmi2,avx512vl")]
2105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2107#[rustc_legacy_const_generics(2)]
2108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2109pub const fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2110    static_assert_uimm_bits!(IMM8, 8);
2111    _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
2112}
2113
2114/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2115///
2116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
2117#[inline]
2118#[target_feature(enable = "avx512vbmi2,avx512vl")]
2119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2120#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2121#[rustc_legacy_const_generics(4)]
2122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123pub const fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
2124    src: __m256i,
2125    k: __mmask8,
2126    a: __m256i,
2127    b: __m256i,
2128) -> __m256i {
2129    unsafe {
2130        static_assert_uimm_bits!(IMM8, 8);
2131        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
2132        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
2133    }
2134}
2135
2136/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2137///
2138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
2139#[inline]
2140#[target_feature(enable = "avx512vbmi2,avx512vl")]
2141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2142#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2143#[rustc_legacy_const_generics(3)]
2144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2145pub const fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(
2146    k: __mmask8,
2147    a: __m256i,
2148    b: __m256i,
2149) -> __m256i {
2150    unsafe {
2151        static_assert_uimm_bits!(IMM8, 8);
2152        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
2154    }
2155}
2156
2157/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
2160#[inline]
2161#[target_feature(enable = "avx512vbmi2,avx512vl")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2164#[rustc_legacy_const_generics(2)]
2165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2166pub const fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2167    static_assert_uimm_bits!(IMM8, 8);
2168    _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
2169}
2170
2171/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
2174#[inline]
2175#[target_feature(enable = "avx512vbmi2,avx512vl")]
2176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2177#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2178#[rustc_legacy_const_generics(4)]
2179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180pub const fn _mm_mask_shrdi_epi32<const IMM8: i32>(
2181    src: __m128i,
2182    k: __mmask8,
2183    a: __m128i,
2184    b: __m128i,
2185) -> __m128i {
2186    unsafe {
2187        static_assert_uimm_bits!(IMM8, 8);
2188        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
2189        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
2190    }
2191}
2192
2193/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
2196#[inline]
2197#[target_feature(enable = "avx512vbmi2,avx512vl")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2200#[rustc_legacy_const_generics(3)]
2201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2202pub const fn _mm_maskz_shrdi_epi32<const IMM8: i32>(
2203    k: __mmask8,
2204    a: __m128i,
2205    b: __m128i,
2206) -> __m128i {
2207    unsafe {
2208        static_assert_uimm_bits!(IMM8, 8);
2209        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
2210        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
2211    }
2212}
2213
2214/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
2217#[inline]
2218#[target_feature(enable = "avx512vbmi2")]
2219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2220#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2221#[rustc_legacy_const_generics(2)]
2222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2223pub const fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2224    static_assert_uimm_bits!(IMM8, 8);
2225    _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
2226}
2227
2228/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
2231#[inline]
2232#[target_feature(enable = "avx512vbmi2")]
2233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2234#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2235#[rustc_legacy_const_generics(4)]
2236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2237pub const fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
2238    src: __m512i,
2239    k: __mmask32,
2240    a: __m512i,
2241    b: __m512i,
2242) -> __m512i {
2243    unsafe {
2244        static_assert_uimm_bits!(IMM8, 8);
2245        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
2246        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
2247    }
2248}
2249
2250/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
2253#[inline]
2254#[target_feature(enable = "avx512vbmi2")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2257#[rustc_legacy_const_generics(3)]
2258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2259pub const fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(
2260    k: __mmask32,
2261    a: __m512i,
2262    b: __m512i,
2263) -> __m512i {
2264    unsafe {
2265        static_assert_uimm_bits!(IMM8, 8);
2266        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
2267        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
2268    }
2269}
2270
2271/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2274#[inline]
2275#[target_feature(enable = "avx512vbmi2,avx512vl")]
2276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2277#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2278#[rustc_legacy_const_generics(2)]
2279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2280pub const fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2281    static_assert_uimm_bits!(IMM8, 8);
2282    _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
2283}
2284
2285/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2286///
2287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2288#[inline]
2289#[target_feature(enable = "avx512vbmi2,avx512vl")]
2290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2291#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2292#[rustc_legacy_const_generics(4)]
2293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2294pub const fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2295    src: __m256i,
2296    k: __mmask16,
2297    a: __m256i,
2298    b: __m256i,
2299) -> __m256i {
2300    unsafe {
2301        static_assert_uimm_bits!(IMM8, 8);
2302        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2303        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
2304    }
2305}
2306
2307/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2310#[inline]
2311#[target_feature(enable = "avx512vbmi2,avx512vl")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2314#[rustc_legacy_const_generics(3)]
2315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2316pub const fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(
2317    k: __mmask16,
2318    a: __m256i,
2319    b: __m256i,
2320) -> __m256i {
2321    unsafe {
2322        static_assert_uimm_bits!(IMM8, 8);
2323        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2324        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
2325    }
2326}
2327
2328/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2329///
2330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2331#[inline]
2332#[target_feature(enable = "avx512vbmi2,avx512vl")]
2333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2334#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2335#[rustc_legacy_const_generics(2)]
2336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2337pub const fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2338    static_assert_uimm_bits!(IMM8, 8);
2339    _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
2340}
2341
2342/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2343///
2344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2345#[inline]
2346#[target_feature(enable = "avx512vbmi2,avx512vl")]
2347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2348#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2349#[rustc_legacy_const_generics(4)]
2350#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2351pub const fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2352    src: __m128i,
2353    k: __mmask8,
2354    a: __m128i,
2355    b: __m128i,
2356) -> __m128i {
2357    unsafe {
2358        static_assert_uimm_bits!(IMM8, 8);
2359        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2360        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
2361    }
2362}
2363
2364/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2365///
2366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2367#[inline]
2368#[target_feature(enable = "avx512vbmi2,avx512vl")]
2369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2370#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2371#[rustc_legacy_const_generics(3)]
2372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2373pub const fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
2374    k: __mmask8,
2375    a: __m128i,
2376    b: __m128i,
2377) -> __m128i {
2378    unsafe {
2379        static_assert_uimm_bits!(IMM8, 8);
2380        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2381        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
2382    }
2383}
2384
2385#[allow(improper_ctypes)]
2386unsafe extern "C" {
2387    #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2388    fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2389    #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2390    fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2391    #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2392    fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2393
2394    #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2395    fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2396    #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2397    fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2398    #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2399    fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2400
2401    #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2402    fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2403    #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2404    fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2405    #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2406    fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2407
2408    #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2409    fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2410    #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2411    fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2412    #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2413    fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2414
2415    #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2416    fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2417    #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2418    fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2419    #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2420    fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2421
2422    #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2423    fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2424    #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2425    fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2426    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2427    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2428
2429    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2430    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2431    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2432    fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2433    #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2434    fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2435    #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2436    fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2437    #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2438    fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2439    #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2440    fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2441}
2442
2443#[cfg(test)]
2444mod tests {
2445    use crate::core_arch::assert_eq_const as assert_eq;
2446
2447    use stdarch_test::simd_test;
2448
2449    use crate::core_arch::x86::*;
2450    use crate::hint::black_box;
2451
2452    #[simd_test(enable = "avx512vbmi2")]
2453    fn test_mm512_mask_compress_epi16() {
2454        let src = _mm512_set1_epi16(200);
2455        #[rustfmt::skip]
2456        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2457                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2458        let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2459        #[rustfmt::skip]
2460        let e = _mm512_set_epi16(
2461            200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2462            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2463        );
2464        assert_eq_m512i(r, e);
2465    }
2466
2467    #[simd_test(enable = "avx512vbmi2")]
2468    fn test_mm512_maskz_compress_epi16() {
2469        #[rustfmt::skip]
2470        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2471                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2472        let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2473        #[rustfmt::skip]
2474        let e = _mm512_set_epi16(
2475            0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2476            1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2477        );
2478        assert_eq_m512i(r, e);
2479    }
2480
2481    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2482    fn test_mm256_mask_compress_epi16() {
2483        let src = _mm256_set1_epi16(200);
2484        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2485        let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2486        let e = _mm256_set_epi16(
2487            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2488        );
2489        assert_eq_m256i(r, e);
2490    }
2491
2492    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2493    fn test_mm256_maskz_compress_epi16() {
2494        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2495        let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2496        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2497        assert_eq_m256i(r, e);
2498    }
2499
2500    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2501    fn test_mm_mask_compress_epi16() {
2502        let src = _mm_set1_epi16(200);
2503        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2504        let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2505        let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2506        assert_eq_m128i(r, e);
2507    }
2508
2509    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2510    fn test_mm_maskz_compress_epi16() {
2511        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2512        let r = _mm_maskz_compress_epi16(0b01010101, a);
2513        let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2514        assert_eq_m128i(r, e);
2515    }
2516
2517    #[simd_test(enable = "avx512vbmi2")]
2518    fn test_mm512_mask_compress_epi8() {
2519        let src = _mm512_set1_epi8(100);
2520        #[rustfmt::skip]
2521        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2522                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2523                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2524                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2525        let r = _mm512_mask_compress_epi8(
2526            src,
2527            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2528            a,
2529        );
2530        #[rustfmt::skip]
2531        let e = _mm512_set_epi8(
2532            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2533            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2534            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2535            33,  35,  37,  39,  41,  43,  45,  47,  49,  51,  53,  55,  57,  59,  61,  63,
2536        );
2537        assert_eq_m512i(r, e);
2538    }
2539
2540    #[simd_test(enable = "avx512vbmi2")]
2541    fn test_mm512_maskz_compress_epi8() {
2542        #[rustfmt::skip]
2543        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2544                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2545                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2546                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2547        let r = _mm512_maskz_compress_epi8(
2548            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2549            a,
2550        );
2551        #[rustfmt::skip]
2552        let e = _mm512_set_epi8(
2553            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2554            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2555            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2556            33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2557        );
2558        assert_eq_m512i(r, e);
2559    }
2560
2561    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2562    fn test_mm256_mask_compress_epi8() {
2563        let src = _mm256_set1_epi8(100);
2564        #[rustfmt::skip]
2565        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2566                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2567        let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2568        #[rustfmt::skip]
2569        let e = _mm256_set_epi8(
2570            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2571            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2572        );
2573        assert_eq_m256i(r, e);
2574    }
2575
2576    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2577    fn test_mm256_maskz_compress_epi8() {
2578        #[rustfmt::skip]
2579        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2580                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2581        let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2582        #[rustfmt::skip]
2583        let e = _mm256_set_epi8(
2584            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2585            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2586        );
2587        assert_eq_m256i(r, e);
2588    }
2589
2590    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2591    fn test_mm_mask_compress_epi8() {
2592        let src = _mm_set1_epi8(100);
2593        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2594        let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2595        let e = _mm_set_epi8(
2596            100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2597        );
2598        assert_eq_m128i(r, e);
2599    }
2600
2601    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2602    fn test_mm_maskz_compress_epi8() {
2603        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2604        let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2605        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2606        assert_eq_m128i(r, e);
2607    }
2608
2609    #[simd_test(enable = "avx512vbmi2")]
2610    fn test_mm512_mask_expand_epi16() {
2611        let src = _mm512_set1_epi16(200);
2612        #[rustfmt::skip]
2613        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2614                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2615        let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2616        #[rustfmt::skip]
2617        let e = _mm512_set_epi16(
2618            200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2619            200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2620        );
2621        assert_eq_m512i(r, e);
2622    }
2623
2624    #[simd_test(enable = "avx512vbmi2")]
2625    fn test_mm512_maskz_expand_epi16() {
2626        #[rustfmt::skip]
2627        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2628                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2629        let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2630        #[rustfmt::skip]
2631        let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2632                                 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2633        assert_eq_m512i(r, e);
2634    }
2635
2636    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2637    fn test_mm256_mask_expand_epi16() {
2638        let src = _mm256_set1_epi16(200);
2639        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2640        let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2641        let e = _mm256_set_epi16(
2642            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2643        );
2644        assert_eq_m256i(r, e);
2645    }
2646
2647    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2648    fn test_mm256_maskz_expand_epi16() {
2649        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2650        let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2651        let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2652        assert_eq_m256i(r, e);
2653    }
2654
2655    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2656    fn test_mm_mask_expand_epi16() {
2657        let src = _mm_set1_epi16(200);
2658        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2659        let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2660        let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2661        assert_eq_m128i(r, e);
2662    }
2663
2664    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2665    fn test_mm_maskz_expand_epi16() {
2666        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2667        let r = _mm_maskz_expand_epi16(0b01010101, a);
2668        let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2669        assert_eq_m128i(r, e);
2670    }
2671
2672    #[simd_test(enable = "avx512vbmi2")]
2673    fn test_mm512_mask_expand_epi8() {
2674        let src = _mm512_set1_epi8(100);
2675        #[rustfmt::skip]
2676        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2677                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2678                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2679                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2680        let r = _mm512_mask_expand_epi8(
2681            src,
2682            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2683            a,
2684        );
2685        #[rustfmt::skip]
2686        let e = _mm512_set_epi8(
2687            100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2688            100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2689            100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2690            100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2691        );
2692        assert_eq_m512i(r, e);
2693    }
2694
2695    #[simd_test(enable = "avx512vbmi2")]
2696    fn test_mm512_maskz_expand_epi8() {
2697        #[rustfmt::skip]
2698        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2699                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2700                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2701                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2702        let r = _mm512_maskz_expand_epi8(
2703            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2704            a,
2705        );
2706        #[rustfmt::skip]
2707        let e = _mm512_set_epi8(
2708            0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2709            0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2710            0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2711            0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2712        );
2713        assert_eq_m512i(r, e);
2714    }
2715
2716    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2717    fn test_mm256_mask_expand_epi8() {
2718        let src = _mm256_set1_epi8(100);
2719        #[rustfmt::skip]
2720        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2721                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2722        let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2723        #[rustfmt::skip]
2724        let e = _mm256_set_epi8(
2725            100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2726            100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2727        );
2728        assert_eq_m256i(r, e);
2729    }
2730
2731    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2732    fn test_mm256_maskz_expand_epi8() {
2733        #[rustfmt::skip]
2734        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2735                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2736        let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2737        #[rustfmt::skip]
2738        let e = _mm256_set_epi8(
2739            0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2740            0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2741        );
2742        assert_eq_m256i(r, e);
2743    }
2744
2745    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2746    fn test_mm_mask_expand_epi8() {
2747        let src = _mm_set1_epi8(100);
2748        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2749        let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2750        let e = _mm_set_epi8(
2751            100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2752        );
2753        assert_eq_m128i(r, e);
2754    }
2755
2756    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2757    fn test_mm_maskz_expand_epi8() {
2758        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2759        let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2760        let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2761        assert_eq_m128i(r, e);
2762    }
2763
2764    #[simd_test(enable = "avx512vbmi2")]
2765    const fn test_mm512_shldv_epi64() {
2766        let a = _mm512_set1_epi64(1);
2767        let b = _mm512_set1_epi64(1 << 63);
2768        let c = _mm512_set1_epi64(2);
2769        let r = _mm512_shldv_epi64(a, b, c);
2770        let e = _mm512_set1_epi64(6);
2771        assert_eq_m512i(r, e);
2772    }
2773
2774    #[simd_test(enable = "avx512vbmi2")]
2775    const fn test_mm512_mask_shldv_epi64() {
2776        let a = _mm512_set1_epi64(1);
2777        let b = _mm512_set1_epi64(1 << 63);
2778        let c = _mm512_set1_epi64(2);
2779        let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2780        assert_eq_m512i(r, a);
2781        let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2782        let e = _mm512_set1_epi64(6);
2783        assert_eq_m512i(r, e);
2784    }
2785
2786    #[simd_test(enable = "avx512vbmi2")]
2787    const fn test_mm512_maskz_shldv_epi64() {
2788        let a = _mm512_set1_epi64(1);
2789        let b = _mm512_set1_epi64(1 << 63);
2790        let c = _mm512_set1_epi64(2);
2791        let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2792        assert_eq_m512i(r, _mm512_setzero_si512());
2793        let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2794        let e = _mm512_set1_epi64(6);
2795        assert_eq_m512i(r, e);
2796    }
2797
2798    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2799    const fn test_mm256_shldv_epi64() {
2800        let a = _mm256_set1_epi64x(1);
2801        let b = _mm256_set1_epi64x(1 << 63);
2802        let c = _mm256_set1_epi64x(2);
2803        let r = _mm256_shldv_epi64(a, b, c);
2804        let e = _mm256_set1_epi64x(6);
2805        assert_eq_m256i(r, e);
2806    }
2807
2808    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2809    const fn test_mm256_mask_shldv_epi64() {
2810        let a = _mm256_set1_epi64x(1);
2811        let b = _mm256_set1_epi64x(1 << 63);
2812        let c = _mm256_set1_epi64x(2);
2813        let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2814        assert_eq_m256i(r, a);
2815        let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2816        let e = _mm256_set1_epi64x(6);
2817        assert_eq_m256i(r, e);
2818    }
2819
2820    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2821    const fn test_mm256_maskz_shldv_epi64() {
2822        let a = _mm256_set1_epi64x(1);
2823        let b = _mm256_set1_epi64x(1 << 63);
2824        let c = _mm256_set1_epi64x(2);
2825        let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2826        assert_eq_m256i(r, _mm256_setzero_si256());
2827        let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2828        let e = _mm256_set1_epi64x(6);
2829        assert_eq_m256i(r, e);
2830    }
2831
2832    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2833    const fn test_mm_shldv_epi64() {
2834        let a = _mm_set1_epi64x(1);
2835        let b = _mm_set1_epi64x(1 << 63);
2836        let c = _mm_set1_epi64x(2);
2837        let r = _mm_shldv_epi64(a, b, c);
2838        let e = _mm_set1_epi64x(6);
2839        assert_eq_m128i(r, e);
2840    }
2841
2842    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2843    const fn test_mm_mask_shldv_epi64() {
2844        let a = _mm_set1_epi64x(1);
2845        let b = _mm_set1_epi64x(1 << 63);
2846        let c = _mm_set1_epi64x(2);
2847        let r = _mm_mask_shldv_epi64(a, 0, b, c);
2848        assert_eq_m128i(r, a);
2849        let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2850        let e = _mm_set1_epi64x(6);
2851        assert_eq_m128i(r, e);
2852    }
2853
2854    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2855    const fn test_mm_maskz_shldv_epi64() {
2856        let a = _mm_set1_epi64x(1);
2857        let b = _mm_set1_epi64x(1 << 63);
2858        let c = _mm_set1_epi64x(2);
2859        let r = _mm_maskz_shldv_epi64(0, a, b, c);
2860        assert_eq_m128i(r, _mm_setzero_si128());
2861        let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2862        let e = _mm_set1_epi64x(6);
2863        assert_eq_m128i(r, e);
2864    }
2865
2866    #[simd_test(enable = "avx512vbmi2")]
2867    const fn test_mm512_shldv_epi32() {
2868        let a = _mm512_set1_epi32(1);
2869        let b = _mm512_set1_epi32(1 << 31);
2870        let c = _mm512_set1_epi32(2);
2871        let r = _mm512_shldv_epi32(a, b, c);
2872        let e = _mm512_set1_epi32(6);
2873        assert_eq_m512i(r, e);
2874    }
2875
2876    #[simd_test(enable = "avx512vbmi2")]
2877    const fn test_mm512_mask_shldv_epi32() {
2878        let a = _mm512_set1_epi32(1);
2879        let b = _mm512_set1_epi32(1 << 31);
2880        let c = _mm512_set1_epi32(2);
2881        let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2882        assert_eq_m512i(r, a);
2883        let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2884        let e = _mm512_set1_epi32(6);
2885        assert_eq_m512i(r, e);
2886    }
2887
2888    #[simd_test(enable = "avx512vbmi2")]
2889    const fn test_mm512_maskz_shldv_epi32() {
2890        let a = _mm512_set1_epi32(1);
2891        let b = _mm512_set1_epi32(1 << 31);
2892        let c = _mm512_set1_epi32(2);
2893        let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2894        assert_eq_m512i(r, _mm512_setzero_si512());
2895        let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2896        let e = _mm512_set1_epi32(6);
2897        assert_eq_m512i(r, e);
2898    }
2899
2900    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2901    const fn test_mm256_shldv_epi32() {
2902        let a = _mm256_set1_epi32(1);
2903        let b = _mm256_set1_epi32(1 << 31);
2904        let c = _mm256_set1_epi32(2);
2905        let r = _mm256_shldv_epi32(a, b, c);
2906        let e = _mm256_set1_epi32(6);
2907        assert_eq_m256i(r, e);
2908    }
2909
2910    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2911    const fn test_mm256_mask_shldv_epi32() {
2912        let a = _mm256_set1_epi32(1);
2913        let b = _mm256_set1_epi32(1 << 31);
2914        let c = _mm256_set1_epi32(2);
2915        let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2916        assert_eq_m256i(r, a);
2917        let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2918        let e = _mm256_set1_epi32(6);
2919        assert_eq_m256i(r, e);
2920    }
2921
2922    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2923    const fn test_mm256_maskz_shldv_epi32() {
2924        let a = _mm256_set1_epi32(1);
2925        let b = _mm256_set1_epi32(1 << 31);
2926        let c = _mm256_set1_epi32(2);
2927        let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2928        assert_eq_m256i(r, _mm256_setzero_si256());
2929        let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2930        let e = _mm256_set1_epi32(6);
2931        assert_eq_m256i(r, e);
2932    }
2933
2934    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2935    const fn test_mm_shldv_epi32() {
2936        let a = _mm_set1_epi32(1);
2937        let b = _mm_set1_epi32(1 << 31);
2938        let c = _mm_set1_epi32(2);
2939        let r = _mm_shldv_epi32(a, b, c);
2940        let e = _mm_set1_epi32(6);
2941        assert_eq_m128i(r, e);
2942    }
2943
2944    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2945    const fn test_mm_mask_shldv_epi32() {
2946        let a = _mm_set1_epi32(1);
2947        let b = _mm_set1_epi32(1 << 31);
2948        let c = _mm_set1_epi32(2);
2949        let r = _mm_mask_shldv_epi32(a, 0, b, c);
2950        assert_eq_m128i(r, a);
2951        let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2952        let e = _mm_set1_epi32(6);
2953        assert_eq_m128i(r, e);
2954    }
2955
2956    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2957    const fn test_mm_maskz_shldv_epi32() {
2958        let a = _mm_set1_epi32(1);
2959        let b = _mm_set1_epi32(1 << 31);
2960        let c = _mm_set1_epi32(2);
2961        let r = _mm_maskz_shldv_epi32(0, a, b, c);
2962        assert_eq_m128i(r, _mm_setzero_si128());
2963        let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2964        let e = _mm_set1_epi32(6);
2965        assert_eq_m128i(r, e);
2966    }
2967
2968    #[simd_test(enable = "avx512vbmi2")]
2969    const fn test_mm512_shldv_epi16() {
2970        let a = _mm512_set1_epi16(1);
2971        let b = _mm512_set1_epi16(1 << 15);
2972        let c = _mm512_set1_epi16(2);
2973        let r = _mm512_shldv_epi16(a, b, c);
2974        let e = _mm512_set1_epi16(6);
2975        assert_eq_m512i(r, e);
2976    }
2977
2978    #[simd_test(enable = "avx512vbmi2")]
2979    const fn test_mm512_mask_shldv_epi16() {
2980        let a = _mm512_set1_epi16(1);
2981        let b = _mm512_set1_epi16(1 << 15);
2982        let c = _mm512_set1_epi16(2);
2983        let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2984        assert_eq_m512i(r, a);
2985        let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2986        let e = _mm512_set1_epi16(6);
2987        assert_eq_m512i(r, e);
2988    }
2989
2990    #[simd_test(enable = "avx512vbmi2")]
2991    const fn test_mm512_maskz_shldv_epi16() {
2992        let a = _mm512_set1_epi16(1);
2993        let b = _mm512_set1_epi16(1 << 15);
2994        let c = _mm512_set1_epi16(2);
2995        let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2996        assert_eq_m512i(r, _mm512_setzero_si512());
2997        let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2998        let e = _mm512_set1_epi16(6);
2999        assert_eq_m512i(r, e);
3000    }
3001
3002    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3003    const fn test_mm256_shldv_epi16() {
3004        let a = _mm256_set1_epi16(1);
3005        let b = _mm256_set1_epi16(1 << 15);
3006        let c = _mm256_set1_epi16(2);
3007        let r = _mm256_shldv_epi16(a, b, c);
3008        let e = _mm256_set1_epi16(6);
3009        assert_eq_m256i(r, e);
3010    }
3011
3012    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3013    const fn test_mm256_mask_shldv_epi16() {
3014        let a = _mm256_set1_epi16(1);
3015        let b = _mm256_set1_epi16(1 << 15);
3016        let c = _mm256_set1_epi16(2);
3017        let r = _mm256_mask_shldv_epi16(a, 0, b, c);
3018        assert_eq_m256i(r, a);
3019        let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
3020        let e = _mm256_set1_epi16(6);
3021        assert_eq_m256i(r, e);
3022    }
3023
3024    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3025    const fn test_mm256_maskz_shldv_epi16() {
3026        let a = _mm256_set1_epi16(1);
3027        let b = _mm256_set1_epi16(1 << 15);
3028        let c = _mm256_set1_epi16(2);
3029        let r = _mm256_maskz_shldv_epi16(0, a, b, c);
3030        assert_eq_m256i(r, _mm256_setzero_si256());
3031        let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
3032        let e = _mm256_set1_epi16(6);
3033        assert_eq_m256i(r, e);
3034    }
3035
3036    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3037    const fn test_mm_shldv_epi16() {
3038        let a = _mm_set1_epi16(1);
3039        let b = _mm_set1_epi16(1 << 15);
3040        let c = _mm_set1_epi16(2);
3041        let r = _mm_shldv_epi16(a, b, c);
3042        let e = _mm_set1_epi16(6);
3043        assert_eq_m128i(r, e);
3044    }
3045
3046    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3047    const fn test_mm_mask_shldv_epi16() {
3048        let a = _mm_set1_epi16(1);
3049        let b = _mm_set1_epi16(1 << 15);
3050        let c = _mm_set1_epi16(2);
3051        let r = _mm_mask_shldv_epi16(a, 0, b, c);
3052        assert_eq_m128i(r, a);
3053        let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
3054        let e = _mm_set1_epi16(6);
3055        assert_eq_m128i(r, e);
3056    }
3057
3058    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3059    const fn test_mm_maskz_shldv_epi16() {
3060        let a = _mm_set1_epi16(1);
3061        let b = _mm_set1_epi16(1 << 15);
3062        let c = _mm_set1_epi16(2);
3063        let r = _mm_maskz_shldv_epi16(0, a, b, c);
3064        assert_eq_m128i(r, _mm_setzero_si128());
3065        let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
3066        let e = _mm_set1_epi16(6);
3067        assert_eq_m128i(r, e);
3068    }
3069
3070    #[simd_test(enable = "avx512vbmi2")]
3071    const fn test_mm512_shrdv_epi64() {
3072        let a = _mm512_set1_epi64(2);
3073        let b = _mm512_set1_epi64(8);
3074        let c = _mm512_set1_epi64(1);
3075        let r = _mm512_shrdv_epi64(a, b, c);
3076        let e = _mm512_set1_epi64(1);
3077        assert_eq_m512i(r, e);
3078    }
3079
3080    #[simd_test(enable = "avx512vbmi2")]
3081    const fn test_mm512_mask_shrdv_epi64() {
3082        let a = _mm512_set1_epi64(2);
3083        let b = _mm512_set1_epi64(8);
3084        let c = _mm512_set1_epi64(1);
3085        let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
3086        assert_eq_m512i(r, a);
3087        let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
3088        let e = _mm512_set1_epi64(1);
3089        assert_eq_m512i(r, e);
3090    }
3091
3092    #[simd_test(enable = "avx512vbmi2")]
3093    const fn test_mm512_maskz_shrdv_epi64() {
3094        let a = _mm512_set1_epi64(2);
3095        let b = _mm512_set1_epi64(8);
3096        let c = _mm512_set1_epi64(1);
3097        let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
3098        assert_eq_m512i(r, _mm512_setzero_si512());
3099        let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
3100        let e = _mm512_set1_epi64(1);
3101        assert_eq_m512i(r, e);
3102    }
3103
3104    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3105    const fn test_mm256_shrdv_epi64() {
3106        let a = _mm256_set1_epi64x(2);
3107        let b = _mm256_set1_epi64x(8);
3108        let c = _mm256_set1_epi64x(1);
3109        let r = _mm256_shrdv_epi64(a, b, c);
3110        let e = _mm256_set1_epi64x(1);
3111        assert_eq_m256i(r, e);
3112    }
3113
3114    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3115    const fn test_mm256_mask_shrdv_epi64() {
3116        let a = _mm256_set1_epi64x(2);
3117        let b = _mm256_set1_epi64x(8);
3118        let c = _mm256_set1_epi64x(1);
3119        let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
3120        assert_eq_m256i(r, a);
3121        let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
3122        let e = _mm256_set1_epi64x(1);
3123        assert_eq_m256i(r, e);
3124    }
3125
3126    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3127    const fn test_mm256_maskz_shrdv_epi64() {
3128        let a = _mm256_set1_epi64x(2);
3129        let b = _mm256_set1_epi64x(8);
3130        let c = _mm256_set1_epi64x(1);
3131        let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
3132        assert_eq_m256i(r, _mm256_setzero_si256());
3133        let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
3134        let e = _mm256_set1_epi64x(1);
3135        assert_eq_m256i(r, e);
3136    }
3137
3138    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3139    const fn test_mm_shrdv_epi64() {
3140        let a = _mm_set1_epi64x(2);
3141        let b = _mm_set1_epi64x(8);
3142        let c = _mm_set1_epi64x(1);
3143        let r = _mm_shrdv_epi64(a, b, c);
3144        let e = _mm_set1_epi64x(1);
3145        assert_eq_m128i(r, e);
3146    }
3147
3148    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3149    const fn test_mm_mask_shrdv_epi64() {
3150        let a = _mm_set1_epi64x(2);
3151        let b = _mm_set1_epi64x(8);
3152        let c = _mm_set1_epi64x(1);
3153        let r = _mm_mask_shrdv_epi64(a, 0, b, c);
3154        assert_eq_m128i(r, a);
3155        let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
3156        let e = _mm_set1_epi64x(1);
3157        assert_eq_m128i(r, e);
3158    }
3159
3160    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3161    const fn test_mm_maskz_shrdv_epi64() {
3162        let a = _mm_set1_epi64x(2);
3163        let b = _mm_set1_epi64x(8);
3164        let c = _mm_set1_epi64x(1);
3165        let r = _mm_maskz_shrdv_epi64(0, a, b, c);
3166        assert_eq_m128i(r, _mm_setzero_si128());
3167        let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
3168        let e = _mm_set1_epi64x(1);
3169        assert_eq_m128i(r, e);
3170    }
3171
3172    #[simd_test(enable = "avx512vbmi2")]
3173    const fn test_mm512_shrdv_epi32() {
3174        let a = _mm512_set1_epi32(2);
3175        let b = _mm512_set1_epi32(8);
3176        let c = _mm512_set1_epi32(1);
3177        let r = _mm512_shrdv_epi32(a, b, c);
3178        let e = _mm512_set1_epi32(1);
3179        assert_eq_m512i(r, e);
3180    }
3181
3182    #[simd_test(enable = "avx512vbmi2")]
3183    const fn test_mm512_mask_shrdv_epi32() {
3184        let a = _mm512_set1_epi32(2);
3185        let b = _mm512_set1_epi32(8);
3186        let c = _mm512_set1_epi32(1);
3187        let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
3188        assert_eq_m512i(r, a);
3189        let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
3190        let e = _mm512_set1_epi32(1);
3191        assert_eq_m512i(r, e);
3192    }
3193
3194    #[simd_test(enable = "avx512vbmi2")]
3195    const fn test_mm512_maskz_shrdv_epi32() {
3196        let a = _mm512_set1_epi32(2);
3197        let b = _mm512_set1_epi32(8);
3198        let c = _mm512_set1_epi32(1);
3199        let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
3200        assert_eq_m512i(r, _mm512_setzero_si512());
3201        let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
3202        let e = _mm512_set1_epi32(1);
3203        assert_eq_m512i(r, e);
3204    }
3205
3206    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3207    const fn test_mm256_shrdv_epi32() {
3208        let a = _mm256_set1_epi32(2);
3209        let b = _mm256_set1_epi32(8);
3210        let c = _mm256_set1_epi32(1);
3211        let r = _mm256_shrdv_epi32(a, b, c);
3212        let e = _mm256_set1_epi32(1);
3213        assert_eq_m256i(r, e);
3214    }
3215
3216    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3217    const fn test_mm256_mask_shrdv_epi32() {
3218        let a = _mm256_set1_epi32(2);
3219        let b = _mm256_set1_epi32(8);
3220        let c = _mm256_set1_epi32(1);
3221        let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
3222        assert_eq_m256i(r, a);
3223        let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
3224        let e = _mm256_set1_epi32(1);
3225        assert_eq_m256i(r, e);
3226    }
3227
3228    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3229    const fn test_mm256_maskz_shrdv_epi32() {
3230        let a = _mm256_set1_epi32(2);
3231        let b = _mm256_set1_epi32(8);
3232        let c = _mm256_set1_epi32(1);
3233        let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
3234        assert_eq_m256i(r, _mm256_setzero_si256());
3235        let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
3236        let e = _mm256_set1_epi32(1);
3237        assert_eq_m256i(r, e);
3238    }
3239
3240    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3241    const fn test_mm_shrdv_epi32() {
3242        let a = _mm_set1_epi32(2);
3243        let b = _mm_set1_epi32(8);
3244        let c = _mm_set1_epi32(1);
3245        let r = _mm_shrdv_epi32(a, b, c);
3246        let e = _mm_set1_epi32(1);
3247        assert_eq_m128i(r, e);
3248    }
3249
3250    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3251    const fn test_mm_mask_shrdv_epi32() {
3252        let a = _mm_set1_epi32(2);
3253        let b = _mm_set1_epi32(8);
3254        let c = _mm_set1_epi32(1);
3255        let r = _mm_mask_shrdv_epi32(a, 0, b, c);
3256        assert_eq_m128i(r, a);
3257        let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
3258        let e = _mm_set1_epi32(1);
3259        assert_eq_m128i(r, e);
3260    }
3261
3262    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3263    const fn test_mm_maskz_shrdv_epi32() {
3264        let a = _mm_set1_epi32(2);
3265        let b = _mm_set1_epi32(8);
3266        let c = _mm_set1_epi32(1);
3267        let r = _mm_maskz_shrdv_epi32(0, a, b, c);
3268        assert_eq_m128i(r, _mm_setzero_si128());
3269        let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
3270        let e = _mm_set1_epi32(1);
3271        assert_eq_m128i(r, e);
3272    }
3273
3274    #[simd_test(enable = "avx512vbmi2")]
3275    const fn test_mm512_shrdv_epi16() {
3276        let a = _mm512_set1_epi16(2);
3277        let b = _mm512_set1_epi16(8);
3278        let c = _mm512_set1_epi16(1);
3279        let r = _mm512_shrdv_epi16(a, b, c);
3280        let e = _mm512_set1_epi16(1);
3281        assert_eq_m512i(r, e);
3282    }
3283
3284    #[simd_test(enable = "avx512vbmi2")]
3285    const fn test_mm512_mask_shrdv_epi16() {
3286        let a = _mm512_set1_epi16(2);
3287        let b = _mm512_set1_epi16(8);
3288        let c = _mm512_set1_epi16(1);
3289        let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
3290        assert_eq_m512i(r, a);
3291        let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3292        let e = _mm512_set1_epi16(1);
3293        assert_eq_m512i(r, e);
3294    }
3295
3296    #[simd_test(enable = "avx512vbmi2")]
3297    const fn test_mm512_maskz_shrdv_epi16() {
3298        let a = _mm512_set1_epi16(2);
3299        let b = _mm512_set1_epi16(8);
3300        let c = _mm512_set1_epi16(1);
3301        let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3302        assert_eq_m512i(r, _mm512_setzero_si512());
3303        let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3304        let e = _mm512_set1_epi16(1);
3305        assert_eq_m512i(r, e);
3306    }
3307
3308    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3309    const fn test_mm256_shrdv_epi16() {
3310        let a = _mm256_set1_epi16(2);
3311        let b = _mm256_set1_epi16(8);
3312        let c = _mm256_set1_epi16(1);
3313        let r = _mm256_shrdv_epi16(a, b, c);
3314        let e = _mm256_set1_epi16(1);
3315        assert_eq_m256i(r, e);
3316    }
3317
3318    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3319    const fn test_mm256_mask_shrdv_epi16() {
3320        let a = _mm256_set1_epi16(2);
3321        let b = _mm256_set1_epi16(8);
3322        let c = _mm256_set1_epi16(1);
3323        let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3324        assert_eq_m256i(r, a);
3325        let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3326        let e = _mm256_set1_epi16(1);
3327        assert_eq_m256i(r, e);
3328    }
3329
3330    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3331    const fn test_mm256_maskz_shrdv_epi16() {
3332        let a = _mm256_set1_epi16(2);
3333        let b = _mm256_set1_epi16(8);
3334        let c = _mm256_set1_epi16(1);
3335        let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3336        assert_eq_m256i(r, _mm256_setzero_si256());
3337        let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3338        let e = _mm256_set1_epi16(1);
3339        assert_eq_m256i(r, e);
3340    }
3341
3342    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3343    const fn test_mm_shrdv_epi16() {
3344        let a = _mm_set1_epi16(2);
3345        let b = _mm_set1_epi16(8);
3346        let c = _mm_set1_epi16(1);
3347        let r = _mm_shrdv_epi16(a, b, c);
3348        let e = _mm_set1_epi16(1);
3349        assert_eq_m128i(r, e);
3350    }
3351
3352    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3353    const fn test_mm_mask_shrdv_epi16() {
3354        let a = _mm_set1_epi16(2);
3355        let b = _mm_set1_epi16(8);
3356        let c = _mm_set1_epi16(1);
3357        let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3358        assert_eq_m128i(r, a);
3359        let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3360        let e = _mm_set1_epi16(1);
3361        assert_eq_m128i(r, e);
3362    }
3363
3364    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3365    const fn test_mm_maskz_shrdv_epi16() {
3366        let a = _mm_set1_epi16(2);
3367        let b = _mm_set1_epi16(8);
3368        let c = _mm_set1_epi16(1);
3369        let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3370        assert_eq_m128i(r, _mm_setzero_si128());
3371        let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3372        let e = _mm_set1_epi16(1);
3373        assert_eq_m128i(r, e);
3374    }
3375
3376    #[simd_test(enable = "avx512vbmi2")]
3377    const fn test_mm512_shldi_epi64() {
3378        let a = _mm512_set1_epi64(1);
3379        let b = _mm512_set1_epi64(1 << 63);
3380        let r = _mm512_shldi_epi64::<2>(a, b);
3381        let e = _mm512_set1_epi64(6);
3382        assert_eq_m512i(r, e);
3383    }
3384
3385    #[simd_test(enable = "avx512vbmi2")]
3386    const fn test_mm512_mask_shldi_epi64() {
3387        let a = _mm512_set1_epi64(1);
3388        let b = _mm512_set1_epi64(1 << 63);
3389        let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3390        assert_eq_m512i(r, a);
3391        let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3392        let e = _mm512_set1_epi64(6);
3393        assert_eq_m512i(r, e);
3394    }
3395
3396    #[simd_test(enable = "avx512vbmi2")]
3397    const fn test_mm512_maskz_shldi_epi64() {
3398        let a = _mm512_set1_epi64(1);
3399        let b = _mm512_set1_epi64(1 << 63);
3400        let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3401        assert_eq_m512i(r, _mm512_setzero_si512());
3402        let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3403        let e = _mm512_set1_epi64(6);
3404        assert_eq_m512i(r, e);
3405    }
3406
3407    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3408    const fn test_mm256_shldi_epi64() {
3409        let a = _mm256_set1_epi64x(1);
3410        let b = _mm256_set1_epi64x(1 << 63);
3411        let r = _mm256_shldi_epi64::<2>(a, b);
3412        let e = _mm256_set1_epi64x(6);
3413        assert_eq_m256i(r, e);
3414    }
3415
3416    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3417    const fn test_mm256_mask_shldi_epi64() {
3418        let a = _mm256_set1_epi64x(1);
3419        let b = _mm256_set1_epi64x(1 << 63);
3420        let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3421        assert_eq_m256i(r, a);
3422        let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3423        let e = _mm256_set1_epi64x(6);
3424        assert_eq_m256i(r, e);
3425    }
3426
3427    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3428    const fn test_mm256_maskz_shldi_epi64() {
3429        let a = _mm256_set1_epi64x(1);
3430        let b = _mm256_set1_epi64x(1 << 63);
3431        let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3432        assert_eq_m256i(r, _mm256_setzero_si256());
3433        let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3434        let e = _mm256_set1_epi64x(6);
3435        assert_eq_m256i(r, e);
3436    }
3437
3438    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3439    const fn test_mm_shldi_epi64() {
3440        let a = _mm_set1_epi64x(1);
3441        let b = _mm_set1_epi64x(1 << 63);
3442        let r = _mm_shldi_epi64::<2>(a, b);
3443        let e = _mm_set1_epi64x(6);
3444        assert_eq_m128i(r, e);
3445    }
3446
3447    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3448    const fn test_mm_mask_shldi_epi64() {
3449        let a = _mm_set1_epi64x(1);
3450        let b = _mm_set1_epi64x(1 << 63);
3451        let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3452        assert_eq_m128i(r, a);
3453        let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3454        let e = _mm_set1_epi64x(6);
3455        assert_eq_m128i(r, e);
3456    }
3457
3458    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3459    const fn test_mm_maskz_shldi_epi64() {
3460        let a = _mm_set1_epi64x(1);
3461        let b = _mm_set1_epi64x(1 << 63);
3462        let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3463        assert_eq_m128i(r, _mm_setzero_si128());
3464        let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3465        let e = _mm_set1_epi64x(6);
3466        assert_eq_m128i(r, e);
3467    }
3468
3469    #[simd_test(enable = "avx512vbmi2")]
3470    const fn test_mm512_shldi_epi32() {
3471        let a = _mm512_set1_epi32(1);
3472        let b = _mm512_set1_epi32(1 << 31);
3473        let r = _mm512_shldi_epi32::<2>(a, b);
3474        let e = _mm512_set1_epi32(6);
3475        assert_eq_m512i(r, e);
3476    }
3477
3478    #[simd_test(enable = "avx512vbmi2")]
3479    const fn test_mm512_mask_shldi_epi32() {
3480        let a = _mm512_set1_epi32(1);
3481        let b = _mm512_set1_epi32(1 << 31);
3482        let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3483        assert_eq_m512i(r, a);
3484        let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3485        let e = _mm512_set1_epi32(6);
3486        assert_eq_m512i(r, e);
3487    }
3488
3489    #[simd_test(enable = "avx512vbmi2")]
3490    const fn test_mm512_maskz_shldi_epi32() {
3491        let a = _mm512_set1_epi32(1);
3492        let b = _mm512_set1_epi32(1 << 31);
3493        let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3494        assert_eq_m512i(r, _mm512_setzero_si512());
3495        let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3496        let e = _mm512_set1_epi32(6);
3497        assert_eq_m512i(r, e);
3498    }
3499
3500    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3501    const fn test_mm256_shldi_epi32() {
3502        let a = _mm256_set1_epi32(1);
3503        let b = _mm256_set1_epi32(1 << 31);
3504        let r = _mm256_shldi_epi32::<2>(a, b);
3505        let e = _mm256_set1_epi32(6);
3506        assert_eq_m256i(r, e);
3507    }
3508
3509    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3510    const fn test_mm256_mask_shldi_epi32() {
3511        let a = _mm256_set1_epi32(1);
3512        let b = _mm256_set1_epi32(1 << 31);
3513        let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3514        assert_eq_m256i(r, a);
3515        let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3516        let e = _mm256_set1_epi32(6);
3517        assert_eq_m256i(r, e);
3518    }
3519
3520    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3521    const fn test_mm256_maskz_shldi_epi32() {
3522        let a = _mm256_set1_epi32(1);
3523        let b = _mm256_set1_epi32(1 << 31);
3524        let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3525        assert_eq_m256i(r, _mm256_setzero_si256());
3526        let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3527        let e = _mm256_set1_epi32(6);
3528        assert_eq_m256i(r, e);
3529    }
3530
3531    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3532    const fn test_mm_shldi_epi32() {
3533        let a = _mm_set1_epi32(1);
3534        let b = _mm_set1_epi32(1 << 31);
3535        let r = _mm_shldi_epi32::<2>(a, b);
3536        let e = _mm_set1_epi32(6);
3537        assert_eq_m128i(r, e);
3538    }
3539
3540    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3541    const fn test_mm_mask_shldi_epi32() {
3542        let a = _mm_set1_epi32(1);
3543        let b = _mm_set1_epi32(1 << 31);
3544        let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3545        assert_eq_m128i(r, a);
3546        let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3547        let e = _mm_set1_epi32(6);
3548        assert_eq_m128i(r, e);
3549    }
3550
3551    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3552    const fn test_mm_maskz_shldi_epi32() {
3553        let a = _mm_set1_epi32(1);
3554        let b = _mm_set1_epi32(1 << 31);
3555        let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3556        assert_eq_m128i(r, _mm_setzero_si128());
3557        let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3558        let e = _mm_set1_epi32(6);
3559        assert_eq_m128i(r, e);
3560    }
3561
3562    #[simd_test(enable = "avx512vbmi2")]
3563    const fn test_mm512_shldi_epi16() {
3564        let a = _mm512_set1_epi16(1);
3565        let b = _mm512_set1_epi16(1 << 15);
3566        let r = _mm512_shldi_epi16::<2>(a, b);
3567        let e = _mm512_set1_epi16(6);
3568        assert_eq_m512i(r, e);
3569    }
3570
3571    #[simd_test(enable = "avx512vbmi2")]
3572    const fn test_mm512_mask_shldi_epi16() {
3573        let a = _mm512_set1_epi16(1);
3574        let b = _mm512_set1_epi16(1 << 15);
3575        let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3576        assert_eq_m512i(r, a);
3577        let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3578        let e = _mm512_set1_epi16(6);
3579        assert_eq_m512i(r, e);
3580    }
3581
3582    #[simd_test(enable = "avx512vbmi2")]
3583    const fn test_mm512_maskz_shldi_epi16() {
3584        let a = _mm512_set1_epi16(1);
3585        let b = _mm512_set1_epi16(1 << 15);
3586        let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3587        assert_eq_m512i(r, _mm512_setzero_si512());
3588        let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3589        let e = _mm512_set1_epi16(6);
3590        assert_eq_m512i(r, e);
3591    }
3592
3593    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3594    const fn test_mm256_shldi_epi16() {
3595        let a = _mm256_set1_epi16(1);
3596        let b = _mm256_set1_epi16(1 << 15);
3597        let r = _mm256_shldi_epi16::<2>(a, b);
3598        let e = _mm256_set1_epi16(6);
3599        assert_eq_m256i(r, e);
3600    }
3601
3602    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3603    const fn test_mm256_mask_shldi_epi16() {
3604        let a = _mm256_set1_epi16(1);
3605        let b = _mm256_set1_epi16(1 << 15);
3606        let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3607        assert_eq_m256i(r, a);
3608        let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3609        let e = _mm256_set1_epi16(6);
3610        assert_eq_m256i(r, e);
3611    }
3612
3613    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3614    const fn test_mm256_maskz_shldi_epi16() {
3615        let a = _mm256_set1_epi16(1);
3616        let b = _mm256_set1_epi16(1 << 15);
3617        let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3618        assert_eq_m256i(r, _mm256_setzero_si256());
3619        let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3620        let e = _mm256_set1_epi16(6);
3621        assert_eq_m256i(r, e);
3622    }
3623
3624    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3625    const fn test_mm_shldi_epi16() {
3626        let a = _mm_set1_epi16(1);
3627        let b = _mm_set1_epi16(1 << 15);
3628        let r = _mm_shldi_epi16::<2>(a, b);
3629        let e = _mm_set1_epi16(6);
3630        assert_eq_m128i(r, e);
3631    }
3632
3633    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3634    const fn test_mm_mask_shldi_epi16() {
3635        let a = _mm_set1_epi16(1);
3636        let b = _mm_set1_epi16(1 << 15);
3637        let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3638        assert_eq_m128i(r, a);
3639        let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3640        let e = _mm_set1_epi16(6);
3641        assert_eq_m128i(r, e);
3642    }
3643
3644    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3645    const fn test_mm_maskz_shldi_epi16() {
3646        let a = _mm_set1_epi16(1);
3647        let b = _mm_set1_epi16(1 << 15);
3648        let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3649        assert_eq_m128i(r, _mm_setzero_si128());
3650        let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3651        let e = _mm_set1_epi16(6);
3652        assert_eq_m128i(r, e);
3653    }
3654
3655    #[simd_test(enable = "avx512vbmi2")]
3656    const fn test_mm512_shrdi_epi64() {
3657        let a = _mm512_set1_epi64(2);
3658        let b = _mm512_set1_epi64(8);
3659        let r = _mm512_shrdi_epi64::<1>(a, b);
3660        let e = _mm512_set1_epi64(1);
3661        assert_eq_m512i(r, e);
3662    }
3663
3664    #[simd_test(enable = "avx512vbmi2")]
3665    const fn test_mm512_mask_shrdi_epi64() {
3666        let a = _mm512_set1_epi64(2);
3667        let b = _mm512_set1_epi64(8);
3668        let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3669        assert_eq_m512i(r, a);
3670        let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3671        let e = _mm512_set1_epi64(1);
3672        assert_eq_m512i(r, e);
3673    }
3674
3675    #[simd_test(enable = "avx512vbmi2")]
3676    const fn test_mm512_maskz_shrdi_epi64() {
3677        let a = _mm512_set1_epi64(2);
3678        let b = _mm512_set1_epi64(8);
3679        let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3680        assert_eq_m512i(r, _mm512_setzero_si512());
3681        let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3682        let e = _mm512_set1_epi64(1);
3683        assert_eq_m512i(r, e);
3684    }
3685
3686    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3687    const fn test_mm256_shrdi_epi64() {
3688        let a = _mm256_set1_epi64x(2);
3689        let b = _mm256_set1_epi64x(8);
3690        let r = _mm256_shrdi_epi64::<1>(a, b);
3691        let e = _mm256_set1_epi64x(1);
3692        assert_eq_m256i(r, e);
3693    }
3694
3695    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3696    const fn test_mm256_mask_shrdi_epi64() {
3697        let a = _mm256_set1_epi64x(2);
3698        let b = _mm256_set1_epi64x(8);
3699        let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3700        assert_eq_m256i(r, a);
3701        let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3702        let e = _mm256_set1_epi64x(1);
3703        assert_eq_m256i(r, e);
3704    }
3705
3706    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3707    const fn test_mm256_maskz_shrdi_epi64() {
3708        let a = _mm256_set1_epi64x(2);
3709        let b = _mm256_set1_epi64x(8);
3710        let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3711        assert_eq_m256i(r, _mm256_setzero_si256());
3712        let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3713        let e = _mm256_set1_epi64x(1);
3714        assert_eq_m256i(r, e);
3715    }
3716
3717    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3718    const fn test_mm_shrdi_epi64() {
3719        let a = _mm_set1_epi64x(2);
3720        let b = _mm_set1_epi64x(8);
3721        let r = _mm_shrdi_epi64::<1>(a, b);
3722        let e = _mm_set1_epi64x(1);
3723        assert_eq_m128i(r, e);
3724    }
3725
3726    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3727    const fn test_mm_mask_shrdi_epi64() {
3728        let a = _mm_set1_epi64x(2);
3729        let b = _mm_set1_epi64x(8);
3730        let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3731        assert_eq_m128i(r, a);
3732        let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3733        let e = _mm_set1_epi64x(1);
3734        assert_eq_m128i(r, e);
3735    }
3736
3737    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3738    const fn test_mm_maskz_shrdi_epi64() {
3739        let a = _mm_set1_epi64x(2);
3740        let b = _mm_set1_epi64x(8);
3741        let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3742        assert_eq_m128i(r, _mm_setzero_si128());
3743        let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3744        let e = _mm_set1_epi64x(1);
3745        assert_eq_m128i(r, e);
3746    }
3747
3748    #[simd_test(enable = "avx512vbmi2")]
3749    const fn test_mm512_shrdi_epi32() {
3750        let a = _mm512_set1_epi32(2);
3751        let b = _mm512_set1_epi32(8);
3752        let r = _mm512_shrdi_epi32::<1>(a, b);
3753        let e = _mm512_set1_epi32(1);
3754        assert_eq_m512i(r, e);
3755    }
3756
3757    #[simd_test(enable = "avx512vbmi2")]
3758    const fn test_mm512_mask_shrdi_epi32() {
3759        let a = _mm512_set1_epi32(2);
3760        let b = _mm512_set1_epi32(8);
3761        let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3762        assert_eq_m512i(r, a);
3763        let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3764        let e = _mm512_set1_epi32(1);
3765        assert_eq_m512i(r, e);
3766    }
3767
3768    #[simd_test(enable = "avx512vbmi2")]
3769    const fn test_mm512_maskz_shrdi_epi32() {
3770        let a = _mm512_set1_epi32(2);
3771        let b = _mm512_set1_epi32(8);
3772        let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3773        assert_eq_m512i(r, _mm512_setzero_si512());
3774        let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3775        let e = _mm512_set1_epi32(1);
3776        assert_eq_m512i(r, e);
3777    }
3778
3779    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3780    const fn test_mm256_shrdi_epi32() {
3781        let a = _mm256_set1_epi32(2);
3782        let b = _mm256_set1_epi32(8);
3783        let r = _mm256_shrdi_epi32::<1>(a, b);
3784        let e = _mm256_set1_epi32(1);
3785        assert_eq_m256i(r, e);
3786    }
3787
3788    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3789    const fn test_mm256_mask_shrdi_epi32() {
3790        let a = _mm256_set1_epi32(2);
3791        let b = _mm256_set1_epi32(8);
3792        let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3793        assert_eq_m256i(r, a);
3794        let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3795        let e = _mm256_set1_epi32(1);
3796        assert_eq_m256i(r, e);
3797    }
3798
3799    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3800    const fn test_mm256_maskz_shrdi_epi32() {
3801        let a = _mm256_set1_epi32(2);
3802        let b = _mm256_set1_epi32(8);
3803        let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3804        assert_eq_m256i(r, _mm256_setzero_si256());
3805        let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3806        let e = _mm256_set1_epi32(1);
3807        assert_eq_m256i(r, e);
3808    }
3809
3810    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3811    const fn test_mm_shrdi_epi32() {
3812        let a = _mm_set1_epi32(2);
3813        let b = _mm_set1_epi32(8);
3814        let r = _mm_shrdi_epi32::<1>(a, b);
3815        let e = _mm_set1_epi32(1);
3816        assert_eq_m128i(r, e);
3817    }
3818
3819    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3820    const fn test_mm_mask_shrdi_epi32() {
3821        let a = _mm_set1_epi32(2);
3822        let b = _mm_set1_epi32(8);
3823        let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3824        assert_eq_m128i(r, a);
3825        let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3826        let e = _mm_set1_epi32(1);
3827        assert_eq_m128i(r, e);
3828    }
3829
3830    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3831    const fn test_mm_maskz_shrdi_epi32() {
3832        let a = _mm_set1_epi32(2);
3833        let b = _mm_set1_epi32(8);
3834        let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3835        assert_eq_m128i(r, _mm_setzero_si128());
3836        let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3837        let e = _mm_set1_epi32(1);
3838        assert_eq_m128i(r, e);
3839    }
3840
3841    #[simd_test(enable = "avx512vbmi2")]
3842    const fn test_mm512_shrdi_epi16() {
3843        let a = _mm512_set1_epi16(2);
3844        let b = _mm512_set1_epi16(8);
3845        let r = _mm512_shrdi_epi16::<1>(a, b);
3846        let e = _mm512_set1_epi16(1);
3847        assert_eq_m512i(r, e);
3848    }
3849
3850    #[simd_test(enable = "avx512vbmi2")]
3851    const fn test_mm512_mask_shrdi_epi16() {
3852        let a = _mm512_set1_epi16(2);
3853        let b = _mm512_set1_epi16(8);
3854        let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3855        assert_eq_m512i(r, a);
3856        let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3857        let e = _mm512_set1_epi16(1);
3858        assert_eq_m512i(r, e);
3859    }
3860
3861    #[simd_test(enable = "avx512vbmi2")]
3862    const fn test_mm512_maskz_shrdi_epi16() {
3863        let a = _mm512_set1_epi16(2);
3864        let b = _mm512_set1_epi16(8);
3865        let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3866        assert_eq_m512i(r, _mm512_setzero_si512());
3867        let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3868        let e = _mm512_set1_epi16(1);
3869        assert_eq_m512i(r, e);
3870    }
3871
3872    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3873    const fn test_mm256_shrdi_epi16() {
3874        let a = _mm256_set1_epi16(2);
3875        let b = _mm256_set1_epi16(8);
3876        let r = _mm256_shrdi_epi16::<1>(a, b);
3877        let e = _mm256_set1_epi16(1);
3878        assert_eq_m256i(r, e);
3879    }
3880
3881    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3882    const fn test_mm256_mask_shrdi_epi16() {
3883        let a = _mm256_set1_epi16(2);
3884        let b = _mm256_set1_epi16(8);
3885        let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3886        assert_eq_m256i(r, a);
3887        let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3888        let e = _mm256_set1_epi16(1);
3889        assert_eq_m256i(r, e);
3890    }
3891
3892    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3893    const fn test_mm256_maskz_shrdi_epi16() {
3894        let a = _mm256_set1_epi16(2);
3895        let b = _mm256_set1_epi16(8);
3896        let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3897        assert_eq_m256i(r, _mm256_setzero_si256());
3898        let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3899        let e = _mm256_set1_epi16(1);
3900        assert_eq_m256i(r, e);
3901    }
3902
3903    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3904    const fn test_mm_shrdi_epi16() {
3905        let a = _mm_set1_epi16(2);
3906        let b = _mm_set1_epi16(8);
3907        let r = _mm_shrdi_epi16::<1>(a, b);
3908        let e = _mm_set1_epi16(1);
3909        assert_eq_m128i(r, e);
3910    }
3911
3912    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3913    const fn test_mm_mask_shrdi_epi16() {
3914        let a = _mm_set1_epi16(2);
3915        let b = _mm_set1_epi16(8);
3916        let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3917        assert_eq_m128i(r, a);
3918        let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3919        let e = _mm_set1_epi16(1);
3920        assert_eq_m128i(r, e);
3921    }
3922
3923    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3924    const fn test_mm_maskz_shrdi_epi16() {
3925        let a = _mm_set1_epi16(2);
3926        let b = _mm_set1_epi16(8);
3927        let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
3928        assert_eq_m128i(r, _mm_setzero_si128());
3929        let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
3930        let e = _mm_set1_epi16(1);
3931        assert_eq_m128i(r, e);
3932    }
3933
3934    #[simd_test(enable = "avx512vbmi2")]
3935    unsafe fn test_mm512_mask_expandloadu_epi16() {
3936        let src = _mm512_set1_epi16(42);
3937        let a = &[
3938            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3939            24, 25, 26, 27, 28, 29, 30, 31, 32,
3940        ];
3941        let p = a.as_ptr();
3942        let m = 0b11101000_11001010_11110000_00001111;
3943        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3944        let e = _mm512_set_epi16(
3945            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3946            42, 42, 42, 42, 42, 4, 3, 2, 1,
3947        );
3948        assert_eq_m512i(r, e);
3949    }
3950
3951    #[simd_test(enable = "avx512vbmi2")]
3952    unsafe fn test_mm512_maskz_expandloadu_epi16() {
3953        let a = &[
3954            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3955            24, 25, 26, 27, 28, 29, 30, 31, 32,
3956        ];
3957        let p = a.as_ptr();
3958        let m = 0b11101000_11001010_11110000_00001111;
3959        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3960        let e = _mm512_set_epi16(
3961            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3962            0, 4, 3, 2, 1,
3963        );
3964        assert_eq_m512i(r, e);
3965    }
3966
3967    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3968    unsafe fn test_mm256_mask_expandloadu_epi16() {
3969        let src = _mm256_set1_epi16(42);
3970        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3971        let p = a.as_ptr();
3972        let m = 0b11101000_11001010;
3973        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3974        let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3975        assert_eq_m256i(r, e);
3976    }
3977
3978    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3979    unsafe fn test_mm256_maskz_expandloadu_epi16() {
3980        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3981        let p = a.as_ptr();
3982        let m = 0b11101000_11001010;
3983        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3984        let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3985        assert_eq_m256i(r, e);
3986    }
3987
3988    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3989    unsafe fn test_mm_mask_expandloadu_epi16() {
3990        let src = _mm_set1_epi16(42);
3991        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3992        let p = a.as_ptr();
3993        let m = 0b11101000;
3994        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3995        let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
3996        assert_eq_m128i(r, e);
3997    }
3998
3999    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4000    unsafe fn test_mm_maskz_expandloadu_epi16() {
4001        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
4002        let p = a.as_ptr();
4003        let m = 0b11101000;
4004        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
4005        let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
4006        assert_eq_m128i(r, e);
4007    }
4008
4009    #[simd_test(enable = "avx512vbmi2")]
4010    unsafe fn test_mm512_mask_expandloadu_epi8() {
4011        let src = _mm512_set1_epi8(42);
4012        let a = &[
4013            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4014            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4015            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
4016        ];
4017        let p = a.as_ptr();
4018        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
4019        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
4020        let e = _mm512_set_epi8(
4021            32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
4022            42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
4023            42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
4024        );
4025        assert_eq_m512i(r, e);
4026    }
4027
4028    #[simd_test(enable = "avx512vbmi2")]
4029    unsafe fn test_mm512_maskz_expandloadu_epi8() {
4030        let a = &[
4031            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4032            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4033            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
4034        ];
4035        let p = a.as_ptr();
4036        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
4037        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
4038        let e = _mm512_set_epi8(
4039            32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
4040            0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
4041            7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
4042        );
4043        assert_eq_m512i(r, e);
4044    }
4045
4046    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4047    unsafe fn test_mm256_mask_expandloadu_epi8() {
4048        let src = _mm256_set1_epi8(42);
4049        let a = &[
4050            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4051            24, 25, 26, 27, 28, 29, 30, 31, 32,
4052        ];
4053        let p = a.as_ptr();
4054        let m = 0b11101000_11001010_11110000_00001111;
4055        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
4056        let e = _mm256_set_epi8(
4057            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
4058            42, 42, 42, 42, 42, 4, 3, 2, 1,
4059        );
4060        assert_eq_m256i(r, e);
4061    }
4062
4063    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4064    unsafe fn test_mm256_maskz_expandloadu_epi8() {
4065        let a = &[
4066            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
4067            24, 25, 26, 27, 28, 29, 30, 31, 32,
4068        ];
4069        let p = a.as_ptr();
4070        let m = 0b11101000_11001010_11110000_00001111;
4071        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
4072        let e = _mm256_set_epi8(
4073            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
4074            0, 4, 3, 2, 1,
4075        );
4076        assert_eq_m256i(r, e);
4077    }
4078
4079    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4080    unsafe fn test_mm_mask_expandloadu_epi8() {
4081        let src = _mm_set1_epi8(42);
4082        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4083        let p = a.as_ptr();
4084        let m = 0b11101000_11001010;
4085        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
4086        let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
4087        assert_eq_m128i(r, e);
4088    }
4089
4090    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4091    unsafe fn test_mm_maskz_expandloadu_epi8() {
4092        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
4093        let p = a.as_ptr();
4094        let m = 0b11101000_11001010;
4095        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
4096        let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
4097        assert_eq_m128i(r, e);
4098    }
4099
4100    #[simd_test(enable = "avx512vbmi2")]
4101    unsafe fn test_mm512_mask_compressstoreu_epi16() {
4102        let a = _mm512_set_epi16(
4103            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
4104            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4105        );
4106        let mut r = [0_i16; 32];
4107        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
4108        assert_eq!(&r, &[0_i16; 32]);
4109        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
4110        assert_eq!(
4111            &r,
4112            &[
4113                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
4114                0, 0, 0, 0, 0, 0, 0, 0, 0
4115            ]
4116        );
4117    }
4118
4119    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4120    unsafe fn test_mm256_mask_compressstoreu_epi16() {
4121        let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
4122        let mut r = [0_i16; 16];
4123        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
4124        assert_eq!(&r, &[0_i16; 16]);
4125        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
4126        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
4127    }
4128
4129    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4130    unsafe fn test_mm_mask_compressstoreu_epi16() {
4131        let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
4132        let mut r = [0_i16; 8];
4133        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
4134        assert_eq!(&r, &[0_i16; 8]);
4135        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
4136        assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
4137    }
4138
4139    #[simd_test(enable = "avx512vbmi2")]
4140    unsafe fn test_mm512_mask_compressstoreu_epi8() {
4141        let a = _mm512_set_epi8(
4142            64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
4143            42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
4144            20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4145        );
4146        let mut r = [0_i8; 64];
4147        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
4148        assert_eq!(&r, &[0_i8; 64]);
4149        _mm512_mask_compressstoreu_epi8(
4150            r.as_mut_ptr(),
4151            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
4152            a,
4153        );
4154        assert_eq!(
4155            &r,
4156            &[
4157                1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
4158                47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4159                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4160            ]
4161        );
4162    }
4163
4164    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4165    unsafe fn test_mm256_mask_compressstoreu_epi8() {
4166        let a = _mm256_set_epi8(
4167            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
4168            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4169        );
4170        let mut r = [0_i8; 32];
4171        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
4172        assert_eq!(&r, &[0_i8; 32]);
4173        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
4174        assert_eq!(
4175            &r,
4176            &[
4177                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
4178                0, 0, 0, 0, 0, 0, 0, 0, 0
4179            ]
4180        );
4181    }
4182
4183    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4184    unsafe fn test_mm_mask_compressstoreu_epi8() {
4185        let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
4186        let mut r = [0_i8; 16];
4187        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
4188        assert_eq!(&r, &[0_i8; 16]);
4189        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
4190        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
4191    }
4192}