1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6 intrinsics::sqrtf32,
7 mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
22 simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b))
23}
24
25#[inline]
30#[target_feature(enable = "sse")]
31#[cfg_attr(test, assert_instr(addps))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
34 simd_add(a, b)
35}
36
37#[inline]
42#[target_feature(enable = "sse")]
43#[cfg_attr(test, assert_instr(subss))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
46 simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b))
47}
48
49#[inline]
54#[target_feature(enable = "sse")]
55#[cfg_attr(test, assert_instr(subps))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
58 simd_sub(a, b)
59}
60
61#[inline]
66#[target_feature(enable = "sse")]
67#[cfg_attr(test, assert_instr(mulss))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
70 simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b))
71}
72
73#[inline]
78#[target_feature(enable = "sse")]
79#[cfg_attr(test, assert_instr(mulps))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
82 simd_mul(a, b)
83}
84
85#[inline]
90#[target_feature(enable = "sse")]
91#[cfg_attr(test, assert_instr(divss))]
92#[stable(feature = "simd_x86", since = "1.27.0")]
93pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
94 simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b))
95}
96
97#[inline]
102#[target_feature(enable = "sse")]
103#[cfg_attr(test, assert_instr(divps))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
106 simd_div(a, b)
107}
108
109#[inline]
114#[target_feature(enable = "sse")]
115#[cfg_attr(test, assert_instr(sqrtss))]
116#[stable(feature = "simd_x86", since = "1.27.0")]
117pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 {
118 simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a)))
119}
120
121#[inline]
126#[target_feature(enable = "sse")]
127#[cfg_attr(test, assert_instr(sqrtps))]
128#[stable(feature = "simd_x86", since = "1.27.0")]
129pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 {
130 simd_fsqrt(a)
131}
132
133#[inline]
138#[target_feature(enable = "sse")]
139#[cfg_attr(test, assert_instr(rcpss))]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 {
142 rcpss(a)
143}
144
145#[inline]
150#[target_feature(enable = "sse")]
151#[cfg_attr(test, assert_instr(rcpps))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
154 rcpps(a)
155}
156
157#[inline]
162#[target_feature(enable = "sse")]
163#[cfg_attr(test, assert_instr(rsqrtss))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 {
166 rsqrtss(a)
167}
168
169#[inline]
174#[target_feature(enable = "sse")]
175#[cfg_attr(test, assert_instr(rsqrtps))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 {
178 rsqrtps(a)
179}
180
181#[inline]
187#[target_feature(enable = "sse")]
188#[cfg_attr(test, assert_instr(minss))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
191 minss(a, b)
192}
193
194#[inline]
199#[target_feature(enable = "sse")]
200#[cfg_attr(test, assert_instr(minps))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
203 minps(a, b)
205}
206
207#[inline]
213#[target_feature(enable = "sse")]
214#[cfg_attr(test, assert_instr(maxss))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
217 maxss(a, b)
218}
219
220#[inline]
225#[target_feature(enable = "sse")]
226#[cfg_attr(test, assert_instr(maxps))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
229 maxps(a, b)
231}
232
233#[inline]
237#[target_feature(enable = "sse")]
238#[cfg_attr(
240 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
241 assert_instr(andps)
242)]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
245 let a: __m128i = mem::transmute(a);
246 let b: __m128i = mem::transmute(b);
247 mem::transmute(simd_and(a, b))
248}
249
250#[inline]
257#[target_feature(enable = "sse")]
258#[cfg_attr(
261 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
262 assert_instr(andnps)
263)]
264#[stable(feature = "simd_x86", since = "1.27.0")]
265pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
266 let a: __m128i = mem::transmute(a);
267 let b: __m128i = mem::transmute(b);
268 let mask: __m128i = mem::transmute(i32x4::splat(-1));
269 mem::transmute(simd_and(simd_xor(mask, a), b))
270}
271
272#[inline]
276#[target_feature(enable = "sse")]
277#[cfg_attr(
279 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
280 assert_instr(orps)
281)]
282#[stable(feature = "simd_x86", since = "1.27.0")]
283pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
284 let a: __m128i = mem::transmute(a);
285 let b: __m128i = mem::transmute(b);
286 mem::transmute(simd_or(a, b))
287}
288
289#[inline]
294#[target_feature(enable = "sse")]
295#[cfg_attr(
297 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
298 assert_instr(xorps)
299)]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
302 let a: __m128i = mem::transmute(a);
303 let b: __m128i = mem::transmute(b);
304 mem::transmute(simd_xor(a, b))
305}
306
307#[inline]
313#[target_feature(enable = "sse")]
314#[cfg_attr(test, assert_instr(cmpeqss))]
315#[stable(feature = "simd_x86", since = "1.27.0")]
316pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
317 cmpss(a, b, 0)
318}
319
320#[inline]
327#[target_feature(enable = "sse")]
328#[cfg_attr(test, assert_instr(cmpltss))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
331 cmpss(a, b, 1)
332}
333
334#[inline]
341#[target_feature(enable = "sse")]
342#[cfg_attr(test, assert_instr(cmpless))]
343#[stable(feature = "simd_x86", since = "1.27.0")]
344pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
345 cmpss(a, b, 2)
346}
347
348#[inline]
355#[target_feature(enable = "sse")]
356#[cfg_attr(test, assert_instr(cmpltss))]
357#[stable(feature = "simd_x86", since = "1.27.0")]
358pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
359 simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3])
360}
361
362#[inline]
369#[target_feature(enable = "sse")]
370#[cfg_attr(test, assert_instr(cmpless))]
371#[stable(feature = "simd_x86", since = "1.27.0")]
372pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
373 simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3])
374}
375
376#[inline]
383#[target_feature(enable = "sse")]
384#[cfg_attr(test, assert_instr(cmpneqss))]
385#[stable(feature = "simd_x86", since = "1.27.0")]
386pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
387 cmpss(a, b, 4)
388}
389
390#[inline]
397#[target_feature(enable = "sse")]
398#[cfg_attr(test, assert_instr(cmpnltss))]
399#[stable(feature = "simd_x86", since = "1.27.0")]
400pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
401 cmpss(a, b, 5)
402}
403
404#[inline]
411#[target_feature(enable = "sse")]
412#[cfg_attr(test, assert_instr(cmpnless))]
413#[stable(feature = "simd_x86", since = "1.27.0")]
414pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
415 cmpss(a, b, 6)
416}
417
418#[inline]
425#[target_feature(enable = "sse")]
426#[cfg_attr(test, assert_instr(cmpnltss))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
429 simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3])
430}
431
432#[inline]
439#[target_feature(enable = "sse")]
440#[cfg_attr(test, assert_instr(cmpnless))]
441#[stable(feature = "simd_x86", since = "1.27.0")]
442pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
443 simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3])
444}
445
446#[inline]
453#[target_feature(enable = "sse")]
454#[cfg_attr(test, assert_instr(cmpordss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
457 cmpss(a, b, 7)
458}
459
460#[inline]
467#[target_feature(enable = "sse")]
468#[cfg_attr(test, assert_instr(cmpunordss))]
469#[stable(feature = "simd_x86", since = "1.27.0")]
470pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
471 cmpss(a, b, 3)
472}
473
474#[inline]
480#[target_feature(enable = "sse")]
481#[cfg_attr(test, assert_instr(cmpeqps))]
482#[stable(feature = "simd_x86", since = "1.27.0")]
483pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
484 cmpps(a, b, 0)
485}
486
487#[inline]
493#[target_feature(enable = "sse")]
494#[cfg_attr(test, assert_instr(cmpltps))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
497 cmpps(a, b, 1)
498}
499
500#[inline]
507#[target_feature(enable = "sse")]
508#[cfg_attr(test, assert_instr(cmpleps))]
509#[stable(feature = "simd_x86", since = "1.27.0")]
510pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
511 cmpps(a, b, 2)
512}
513
514#[inline]
520#[target_feature(enable = "sse")]
521#[cfg_attr(test, assert_instr(cmpltps))]
522#[stable(feature = "simd_x86", since = "1.27.0")]
523pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
524 cmpps(b, a, 1)
525}
526
527#[inline]
534#[target_feature(enable = "sse")]
535#[cfg_attr(test, assert_instr(cmpleps))]
536#[stable(feature = "simd_x86", since = "1.27.0")]
537pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
538 cmpps(b, a, 2)
539}
540
541#[inline]
547#[target_feature(enable = "sse")]
548#[cfg_attr(test, assert_instr(cmpneqps))]
549#[stable(feature = "simd_x86", since = "1.27.0")]
550pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
551 cmpps(a, b, 4)
552}
553
554#[inline]
561#[target_feature(enable = "sse")]
562#[cfg_attr(test, assert_instr(cmpnltps))]
563#[stable(feature = "simd_x86", since = "1.27.0")]
564pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
565 cmpps(a, b, 5)
566}
567
568#[inline]
575#[target_feature(enable = "sse")]
576#[cfg_attr(test, assert_instr(cmpnleps))]
577#[stable(feature = "simd_x86", since = "1.27.0")]
578pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
579 cmpps(a, b, 6)
580}
581
582#[inline]
589#[target_feature(enable = "sse")]
590#[cfg_attr(test, assert_instr(cmpnltps))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
593 cmpps(b, a, 5)
594}
595
596#[inline]
603#[target_feature(enable = "sse")]
604#[cfg_attr(test, assert_instr(cmpnleps))]
605#[stable(feature = "simd_x86", since = "1.27.0")]
606pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
607 cmpps(b, a, 6)
608}
609
610#[inline]
617#[target_feature(enable = "sse")]
618#[cfg_attr(test, assert_instr(cmpordps))]
619#[stable(feature = "simd_x86", since = "1.27.0")]
620pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
621 cmpps(b, a, 7)
622}
623
624#[inline]
631#[target_feature(enable = "sse")]
632#[cfg_attr(test, assert_instr(cmpunordps))]
633#[stable(feature = "simd_x86", since = "1.27.0")]
634pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
635 cmpps(b, a, 3)
636}
637
638#[inline]
643#[target_feature(enable = "sse")]
644#[cfg_attr(test, assert_instr(comiss))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
647 comieq_ss(a, b)
648}
649
650#[inline]
655#[target_feature(enable = "sse")]
656#[cfg_attr(test, assert_instr(comiss))]
657#[stable(feature = "simd_x86", since = "1.27.0")]
658pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
659 comilt_ss(a, b)
660}
661
662#[inline]
668#[target_feature(enable = "sse")]
669#[cfg_attr(test, assert_instr(comiss))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
672 comile_ss(a, b)
673}
674
675#[inline]
681#[target_feature(enable = "sse")]
682#[cfg_attr(test, assert_instr(comiss))]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
685 comigt_ss(a, b)
686}
687
688#[inline]
694#[target_feature(enable = "sse")]
695#[cfg_attr(test, assert_instr(comiss))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
698 comige_ss(a, b)
699}
700
701#[inline]
706#[target_feature(enable = "sse")]
707#[cfg_attr(test, assert_instr(comiss))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
710 comineq_ss(a, b)
711}
712
713#[inline]
719#[target_feature(enable = "sse")]
720#[cfg_attr(test, assert_instr(ucomiss))]
721#[stable(feature = "simd_x86", since = "1.27.0")]
722pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
723 ucomieq_ss(a, b)
724}
725
726#[inline]
733#[target_feature(enable = "sse")]
734#[cfg_attr(test, assert_instr(ucomiss))]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
737 ucomilt_ss(a, b)
738}
739
740#[inline]
747#[target_feature(enable = "sse")]
748#[cfg_attr(test, assert_instr(ucomiss))]
749#[stable(feature = "simd_x86", since = "1.27.0")]
750pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
751 ucomile_ss(a, b)
752}
753
754#[inline]
761#[target_feature(enable = "sse")]
762#[cfg_attr(test, assert_instr(ucomiss))]
763#[stable(feature = "simd_x86", since = "1.27.0")]
764pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
765 ucomigt_ss(a, b)
766}
767
768#[inline]
775#[target_feature(enable = "sse")]
776#[cfg_attr(test, assert_instr(ucomiss))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
779 ucomige_ss(a, b)
780}
781
782#[inline]
788#[target_feature(enable = "sse")]
789#[cfg_attr(test, assert_instr(ucomiss))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
792 ucomineq_ss(a, b)
793}
794
795#[inline]
805#[target_feature(enable = "sse")]
806#[cfg_attr(test, assert_instr(cvtss2si))]
807#[stable(feature = "simd_x86", since = "1.27.0")]
808pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 {
809 cvtss2si(a)
810}
811
812#[inline]
816#[target_feature(enable = "sse")]
817#[cfg_attr(test, assert_instr(cvtss2si))]
818#[stable(feature = "simd_x86", since = "1.27.0")]
819pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
820 _mm_cvtss_si32(a)
821}
822
823#[inline]
835#[target_feature(enable = "sse")]
836#[cfg_attr(test, assert_instr(cvttss2si))]
837#[stable(feature = "simd_x86", since = "1.27.0")]
838pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 {
839 cvttss2si(a)
840}
841
842#[inline]
846#[target_feature(enable = "sse")]
847#[cfg_attr(test, assert_instr(cvttss2si))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 {
850 _mm_cvttss_si32(a)
851}
852
853#[inline]
857#[target_feature(enable = "sse")]
858#[stable(feature = "simd_x86", since = "1.27.0")]
861pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 {
862 simd_extract!(a, 0)
863}
864
865#[inline]
873#[target_feature(enable = "sse")]
874#[cfg_attr(test, assert_instr(cvtsi2ss))]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
877 cvtsi2ss(a, b)
878}
879
880#[inline]
884#[target_feature(enable = "sse")]
885#[cfg_attr(test, assert_instr(cvtsi2ss))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
888 _mm_cvtsi32_ss(a, b)
889}
890
891#[inline]
896#[target_feature(enable = "sse")]
897#[cfg_attr(test, assert_instr(movss))]
898#[stable(feature = "simd_x86", since = "1.27.0")]
899pub unsafe fn _mm_set_ss(a: f32) -> __m128 {
900 __m128([a, 0.0, 0.0, 0.0])
901}
902
903#[inline]
907#[target_feature(enable = "sse")]
908#[cfg_attr(test, assert_instr(shufps))]
909#[stable(feature = "simd_x86", since = "1.27.0")]
910pub unsafe fn _mm_set1_ps(a: f32) -> __m128 {
911 __m128([a, a, a, a])
912}
913
914#[inline]
918#[target_feature(enable = "sse")]
919#[cfg_attr(test, assert_instr(shufps))]
920#[stable(feature = "simd_x86", since = "1.27.0")]
921pub unsafe fn _mm_set_ps1(a: f32) -> __m128 {
922 _mm_set1_ps(a)
923}
924
925#[inline]
945#[target_feature(enable = "sse")]
946#[cfg_attr(test, assert_instr(unpcklps))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
949 __m128([d, c, b, a])
950}
951
952#[inline]
963#[target_feature(enable = "sse")]
964#[cfg_attr(
965 all(test, any(target_env = "msvc", target_arch = "x86_64")),
966 assert_instr(unpcklps)
967)]
968#[cfg_attr(
970 all(test, all(not(target_env = "msvc"), target_arch = "x86")),
971 assert_instr(movaps)
972)]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
975 __m128([a, b, c, d])
976}
977
978#[inline]
982#[target_feature(enable = "sse")]
983#[cfg_attr(test, assert_instr(xorps))]
984#[stable(feature = "simd_x86", since = "1.27.0")]
985pub unsafe fn _mm_setzero_ps() -> __m128 {
986 const { mem::zeroed() }
987}
988
989#[inline]
992#[allow(non_snake_case)]
993#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
994pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
995 ((z << 6) | (y << 4) | (x << 2) | w) as i32
996}
997
998#[inline]
1012#[target_feature(enable = "sse")]
1013#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1014#[rustc_legacy_const_generics(2)]
1015#[stable(feature = "simd_x86", since = "1.27.0")]
1016pub unsafe fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1017 static_assert_uimm_bits!(MASK, 8);
1018 simd_shuffle!(
1019 a,
1020 b,
1021 [
1022 MASK as u32 & 0b11,
1023 (MASK as u32 >> 2) & 0b11,
1024 ((MASK as u32 >> 4) & 0b11) + 4,
1025 ((MASK as u32 >> 6) & 0b11) + 4,
1026 ],
1027 )
1028}
1029
1030#[inline]
1035#[target_feature(enable = "sse")]
1036#[cfg_attr(test, assert_instr(unpckhps))]
1037#[stable(feature = "simd_x86", since = "1.27.0")]
1038pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1039 simd_shuffle!(a, b, [2, 6, 3, 7])
1040}
1041
1042#[inline]
1047#[target_feature(enable = "sse")]
1048#[cfg_attr(test, assert_instr(unpcklps))]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1051 simd_shuffle!(a, b, [0, 4, 1, 5])
1052}
1053
1054#[inline]
1059#[target_feature(enable = "sse")]
1060#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhlps))]
1061#[stable(feature = "simd_x86", since = "1.27.0")]
1062pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1063 simd_shuffle!(a, b, [6, 7, 2, 3])
1065}
1066
1067#[inline]
1072#[target_feature(enable = "sse")]
1073#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1074#[stable(feature = "simd_x86", since = "1.27.0")]
1075pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1076 simd_shuffle!(a, b, [0, 1, 4, 5])
1077}
1078
1079#[inline]
1086#[target_feature(enable = "sse")]
1087#[cfg_attr(test, assert_instr(movmskps))]
1088#[stable(feature = "simd_x86", since = "1.27.0")]
1089pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
1090 let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1093 simd_bitmask::<i32x4, u8>(mask).into()
1094}
1095
1096#[inline]
1103#[target_feature(enable = "sse")]
1104#[cfg_attr(test, assert_instr(movss))]
1105#[stable(feature = "simd_x86", since = "1.27.0")]
1106pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1107 __m128([*p, 0.0, 0.0, 0.0])
1108}
1109
1110#[inline]
1118#[target_feature(enable = "sse")]
1119#[cfg_attr(test, assert_instr(movss))]
1120#[stable(feature = "simd_x86", since = "1.27.0")]
1121pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1122 let a = *p;
1123 __m128([a, a, a, a])
1124}
1125
1126#[inline]
1130#[target_feature(enable = "sse")]
1131#[cfg_attr(test, assert_instr(movss))]
1132#[stable(feature = "simd_x86", since = "1.27.0")]
1133pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1134 _mm_load1_ps(p)
1135}
1136
1137#[inline]
1148#[target_feature(enable = "sse")]
1149#[cfg_attr(test, assert_instr(movaps))]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1151#[allow(clippy::cast_ptr_alignment)]
1152pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1153 *(p as *const __m128)
1154}
1155
1156#[inline]
1166#[target_feature(enable = "sse")]
1167#[cfg_attr(test, assert_instr(movups))]
1168#[stable(feature = "simd_x86", since = "1.27.0")]
1169pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1170 let mut dst = _mm_undefined_ps();
1173 ptr::copy_nonoverlapping(
1174 p as *const u8,
1175 ptr::addr_of_mut!(dst) as *mut u8,
1176 mem::size_of::<__m128>(),
1177 );
1178 dst
1179}
1180
1181#[inline]
1203#[target_feature(enable = "sse")]
1204#[cfg_attr(test, assert_instr(movaps))]
1205#[stable(feature = "simd_x86", since = "1.27.0")]
1206pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1207 let a = _mm_load_ps(p);
1208 simd_shuffle!(a, a, [3, 2, 1, 0])
1209}
1210
1211#[inline]
1217#[target_feature(enable = "sse")]
1218#[cfg_attr(test, assert_instr(movss))]
1219#[stable(feature = "simd_x86", since = "1.27.0")]
1220pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1221 *p = simd_extract!(a, 0);
1222}
1223
1224#[inline]
1243#[target_feature(enable = "sse")]
1244#[cfg_attr(test, assert_instr(movaps))]
1245#[stable(feature = "simd_x86", since = "1.27.0")]
1246#[allow(clippy::cast_ptr_alignment)]
1247pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1248 let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1249 *(p as *mut __m128) = b;
1250}
1251
1252#[inline]
1256#[target_feature(enable = "sse")]
1257#[cfg_attr(test, assert_instr(movaps))]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1260 _mm_store1_ps(p, a);
1261}
1262
1263#[inline]
1275#[target_feature(enable = "sse")]
1276#[cfg_attr(test, assert_instr(movaps))]
1277#[stable(feature = "simd_x86", since = "1.27.0")]
1278#[allow(clippy::cast_ptr_alignment)]
1279pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1280 *(p as *mut __m128) = a;
1281}
1282
1283#[inline]
1291#[target_feature(enable = "sse")]
1292#[cfg_attr(test, assert_instr(movups))]
1293#[stable(feature = "simd_x86", since = "1.27.0")]
1294pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1295 ptr::copy_nonoverlapping(
1296 ptr::addr_of!(a) as *const u8,
1297 p as *mut u8,
1298 mem::size_of::<__m128>(),
1299 );
1300}
1301
1302#[inline]
1319#[target_feature(enable = "sse")]
1320#[cfg_attr(test, assert_instr(movaps))]
1321#[stable(feature = "simd_x86", since = "1.27.0")]
1322#[allow(clippy::cast_ptr_alignment)]
1323pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1324 let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1325 *(p as *mut __m128) = b;
1326}
1327
1328#[inline]
1338#[target_feature(enable = "sse")]
1339#[cfg_attr(test, assert_instr(movss))]
1340#[stable(feature = "simd_x86", since = "1.27.0")]
1341pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1342 simd_shuffle!(a, b, [4, 1, 2, 3])
1343}
1344
1345#[inline]
1413#[target_feature(enable = "sse")]
1414#[cfg_attr(test, assert_instr(sfence))]
1415#[stable(feature = "simd_x86", since = "1.27.0")]
1416pub unsafe fn _mm_sfence() {
1417 sfence()
1418}
1419
1420#[inline]
1435#[target_feature(enable = "sse")]
1436#[cfg_attr(test, assert_instr(stmxcsr))]
1437#[stable(feature = "simd_x86", since = "1.27.0")]
1438#[deprecated(
1439 since = "1.75.0",
1440 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1441)]
1442pub unsafe fn _mm_getcsr() -> u32 {
1443 let mut result = 0_i32;
1444 stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1445 result as u32
1446}
1447
1448#[inline]
1582#[target_feature(enable = "sse")]
1583#[cfg_attr(test, assert_instr(ldmxcsr))]
1584#[stable(feature = "simd_x86", since = "1.27.0")]
1585#[deprecated(
1586 since = "1.75.0",
1587 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1588)]
1589pub unsafe fn _mm_setcsr(val: u32) {
1590 ldmxcsr(ptr::addr_of!(val) as *const i8);
1591}
1592
1593#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1596#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1601pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1602#[stable(feature = "simd_x86", since = "1.27.0")]
1604pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1605#[stable(feature = "simd_x86", since = "1.27.0")]
1607pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1610pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1613pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1614
1615#[stable(feature = "simd_x86", since = "1.27.0")]
1617pub const _MM_MASK_INVALID: u32 = 0x0080;
1618#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub const _MM_MASK_DENORM: u32 = 0x0100;
1621#[stable(feature = "simd_x86", since = "1.27.0")]
1623pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1624#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1627#[stable(feature = "simd_x86", since = "1.27.0")]
1629pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub const _MM_MASK_INEXACT: u32 = 0x1000;
1633#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub const _MM_MASK_MASK: u32 = 0x1f80;
1636
1637#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1640#[stable(feature = "simd_x86", since = "1.27.0")]
1642pub const _MM_ROUND_DOWN: u32 = 0x2000;
1643#[stable(feature = "simd_x86", since = "1.27.0")]
1645pub const _MM_ROUND_UP: u32 = 0x4000;
1646#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1649
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub const _MM_ROUND_MASK: u32 = 0x6000;
1653
1654#[stable(feature = "simd_x86", since = "1.27.0")]
1656pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1657#[stable(feature = "simd_x86", since = "1.27.0")]
1659pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1663
1664#[inline]
1668#[allow(deprecated)] #[allow(non_snake_case)]
1670#[target_feature(enable = "sse")]
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1672#[deprecated(
1673 since = "1.75.0",
1674 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1675)]
1676pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1677 _mm_getcsr() & _MM_MASK_MASK
1678}
1679
1680#[inline]
1684#[allow(deprecated)] #[allow(non_snake_case)]
1686#[target_feature(enable = "sse")]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688#[deprecated(
1689 since = "1.75.0",
1690 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1691)]
1692pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1693 _mm_getcsr() & _MM_EXCEPT_MASK
1694}
1695
1696#[inline]
1700#[allow(deprecated)] #[allow(non_snake_case)]
1702#[target_feature(enable = "sse")]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704#[deprecated(
1705 since = "1.75.0",
1706 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1707)]
1708pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1709 _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1710}
1711
1712#[inline]
1716#[allow(deprecated)] #[allow(non_snake_case)]
1718#[target_feature(enable = "sse")]
1719#[stable(feature = "simd_x86", since = "1.27.0")]
1720#[deprecated(
1721 since = "1.75.0",
1722 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1723)]
1724pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1725 _mm_getcsr() & _MM_ROUND_MASK
1726}
1727
1728#[inline]
1732#[allow(deprecated)] #[allow(non_snake_case)]
1734#[target_feature(enable = "sse")]
1735#[stable(feature = "simd_x86", since = "1.27.0")]
1736#[deprecated(
1737 since = "1.75.0",
1738 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1739)]
1740pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1741 _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
1742}
1743
1744#[inline]
1748#[allow(deprecated)] #[allow(non_snake_case)]
1750#[target_feature(enable = "sse")]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752#[deprecated(
1753 since = "1.75.0",
1754 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1755)]
1756pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1757 _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
1758}
1759
1760#[inline]
1764#[allow(deprecated)] #[allow(non_snake_case)]
1766#[target_feature(enable = "sse")]
1767#[stable(feature = "simd_x86", since = "1.27.0")]
1768#[deprecated(
1769 since = "1.75.0",
1770 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1771)]
1772pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1773 let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
1774 _mm_setcsr(val)
1776}
1777
1778#[inline]
1782#[allow(deprecated)] #[allow(non_snake_case)]
1784#[target_feature(enable = "sse")]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[deprecated(
1787 since = "1.75.0",
1788 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1789)]
1790pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1791 _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x)
1792}
1793
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1796pub const _MM_HINT_T0: i32 = 3;
1797
1798#[stable(feature = "simd_x86", since = "1.27.0")]
1800pub const _MM_HINT_T1: i32 = 2;
1801
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1804pub const _MM_HINT_T2: i32 = 1;
1805
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1808pub const _MM_HINT_NTA: i32 = 0;
1809
1810#[stable(feature = "simd_x86", since = "1.27.0")]
1812pub const _MM_HINT_ET0: i32 = 7;
1813
1814#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub const _MM_HINT_ET1: i32 = 6;
1817
1818#[inline]
1861#[target_feature(enable = "sse")]
1862#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1863#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1864#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1865#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1866#[rustc_legacy_const_generics(1)]
1867#[stable(feature = "simd_x86", since = "1.27.0")]
1868pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1869 static_assert_uimm_bits!(STRATEGY, 3);
1870 prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1873}
1874
1875#[inline]
1881#[target_feature(enable = "sse")]
1882#[stable(feature = "simd_x86", since = "1.27.0")]
1883pub unsafe fn _mm_undefined_ps() -> __m128 {
1884 const { mem::zeroed() }
1885}
1886
1887#[inline]
1891#[allow(non_snake_case)]
1892#[target_feature(enable = "sse")]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub unsafe fn _MM_TRANSPOSE4_PS(
1895 row0: &mut __m128,
1896 row1: &mut __m128,
1897 row2: &mut __m128,
1898 row3: &mut __m128,
1899) {
1900 let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1901 let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1902 let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1903 let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1904
1905 *row0 = _mm_movelh_ps(tmp0, tmp2);
1906 *row1 = _mm_movehl_ps(tmp2, tmp0);
1907 *row2 = _mm_movelh_ps(tmp1, tmp3);
1908 *row3 = _mm_movehl_ps(tmp3, tmp1);
1909}
1910
1911#[allow(improper_ctypes)]
1912extern "C" {
1913 #[link_name = "llvm.x86.sse.rcp.ss"]
1914 fn rcpss(a: __m128) -> __m128;
1915 #[link_name = "llvm.x86.sse.rcp.ps"]
1916 fn rcpps(a: __m128) -> __m128;
1917 #[link_name = "llvm.x86.sse.rsqrt.ss"]
1918 fn rsqrtss(a: __m128) -> __m128;
1919 #[link_name = "llvm.x86.sse.rsqrt.ps"]
1920 fn rsqrtps(a: __m128) -> __m128;
1921 #[link_name = "llvm.x86.sse.min.ss"]
1922 fn minss(a: __m128, b: __m128) -> __m128;
1923 #[link_name = "llvm.x86.sse.min.ps"]
1924 fn minps(a: __m128, b: __m128) -> __m128;
1925 #[link_name = "llvm.x86.sse.max.ss"]
1926 fn maxss(a: __m128, b: __m128) -> __m128;
1927 #[link_name = "llvm.x86.sse.max.ps"]
1928 fn maxps(a: __m128, b: __m128) -> __m128;
1929 #[link_name = "llvm.x86.sse.cmp.ps"]
1930 fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
1931 #[link_name = "llvm.x86.sse.comieq.ss"]
1932 fn comieq_ss(a: __m128, b: __m128) -> i32;
1933 #[link_name = "llvm.x86.sse.comilt.ss"]
1934 fn comilt_ss(a: __m128, b: __m128) -> i32;
1935 #[link_name = "llvm.x86.sse.comile.ss"]
1936 fn comile_ss(a: __m128, b: __m128) -> i32;
1937 #[link_name = "llvm.x86.sse.comigt.ss"]
1938 fn comigt_ss(a: __m128, b: __m128) -> i32;
1939 #[link_name = "llvm.x86.sse.comige.ss"]
1940 fn comige_ss(a: __m128, b: __m128) -> i32;
1941 #[link_name = "llvm.x86.sse.comineq.ss"]
1942 fn comineq_ss(a: __m128, b: __m128) -> i32;
1943 #[link_name = "llvm.x86.sse.ucomieq.ss"]
1944 fn ucomieq_ss(a: __m128, b: __m128) -> i32;
1945 #[link_name = "llvm.x86.sse.ucomilt.ss"]
1946 fn ucomilt_ss(a: __m128, b: __m128) -> i32;
1947 #[link_name = "llvm.x86.sse.ucomile.ss"]
1948 fn ucomile_ss(a: __m128, b: __m128) -> i32;
1949 #[link_name = "llvm.x86.sse.ucomigt.ss"]
1950 fn ucomigt_ss(a: __m128, b: __m128) -> i32;
1951 #[link_name = "llvm.x86.sse.ucomige.ss"]
1952 fn ucomige_ss(a: __m128, b: __m128) -> i32;
1953 #[link_name = "llvm.x86.sse.ucomineq.ss"]
1954 fn ucomineq_ss(a: __m128, b: __m128) -> i32;
1955 #[link_name = "llvm.x86.sse.cvtss2si"]
1956 fn cvtss2si(a: __m128) -> i32;
1957 #[link_name = "llvm.x86.sse.cvttss2si"]
1958 fn cvttss2si(a: __m128) -> i32;
1959 #[link_name = "llvm.x86.sse.cvtsi2ss"]
1960 fn cvtsi2ss(a: __m128, b: i32) -> __m128;
1961 #[link_name = "llvm.x86.sse.sfence"]
1962 fn sfence();
1963 #[link_name = "llvm.x86.sse.stmxcsr"]
1964 fn stmxcsr(p: *mut i8);
1965 #[link_name = "llvm.x86.sse.ldmxcsr"]
1966 fn ldmxcsr(p: *const i8);
1967 #[link_name = "llvm.prefetch"]
1968 fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
1969 #[link_name = "llvm.x86.sse.cmp.ss"]
1970 fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
1971}
1972
1973#[inline]
1989#[target_feature(enable = "sse")]
1990#[cfg_attr(test, assert_instr(movntps))]
1991#[stable(feature = "simd_x86", since = "1.27.0")]
1992#[allow(clippy::cast_ptr_alignment)]
1993pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
1994 crate::arch::asm!(
1995 vps!("movntps", ",{a}"),
1996 p = in(reg) mem_addr,
1997 a = in(xmm_reg) a,
1998 options(nostack, preserves_flags),
1999 );
2000}
2001
2002#[cfg(test)]
2003mod tests {
2004 use crate::{hint::black_box, mem::transmute, ptr};
2005 use std::boxed;
2006 use stdarch_test::simd_test;
2007
2008 use crate::core_arch::{simd::*, x86::*};
2009
2010 const NAN: f32 = f32::NAN;
2011
2012 #[simd_test(enable = "sse")]
2013 unsafe fn test_mm_add_ps() {
2014 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2015 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2016 let r = _mm_add_ps(a, b);
2017 assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2018 }
2019
2020 #[simd_test(enable = "sse")]
2021 unsafe fn test_mm_add_ss() {
2022 let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2023 let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2024 let r = _mm_add_ss(a, b);
2025 assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2026 }
2027
2028 #[simd_test(enable = "sse")]
2029 unsafe fn test_mm_sub_ps() {
2030 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2031 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2032 let r = _mm_sub_ps(a, b);
2033 assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2034 }
2035
2036 #[simd_test(enable = "sse")]
2037 unsafe fn test_mm_sub_ss() {
2038 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2039 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2040 let r = _mm_sub_ss(a, b);
2041 assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2042 }
2043
2044 #[simd_test(enable = "sse")]
2045 unsafe fn test_mm_mul_ps() {
2046 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2047 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2048 let r = _mm_mul_ps(a, b);
2049 assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2050 }
2051
2052 #[simd_test(enable = "sse")]
2053 unsafe fn test_mm_mul_ss() {
2054 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2055 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2056 let r = _mm_mul_ss(a, b);
2057 assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2058 }
2059
2060 #[simd_test(enable = "sse")]
2061 unsafe fn test_mm_div_ps() {
2062 let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2063 let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2064 let r = _mm_div_ps(a, b);
2065 assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2066 }
2067
2068 #[simd_test(enable = "sse")]
2069 unsafe fn test_mm_div_ss() {
2070 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2071 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2072 let r = _mm_div_ss(a, b);
2073 assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2074 }
2075
2076 #[simd_test(enable = "sse")]
2077 unsafe fn test_mm_sqrt_ss() {
2078 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2079 let r = _mm_sqrt_ss(a);
2080 let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2081 assert_eq_m128(r, e);
2082 }
2083
2084 #[simd_test(enable = "sse")]
2085 unsafe fn test_mm_sqrt_ps() {
2086 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2087 let r = _mm_sqrt_ps(a);
2088 let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2089 assert_eq_m128(r, e);
2090 }
2091
2092 #[simd_test(enable = "sse")]
2093 unsafe fn test_mm_rcp_ss() {
2094 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2095 let r = _mm_rcp_ss(a);
2096 let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2097 let rel_err = 0.00048828125;
2098 assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2099 for i in 1..4 {
2100 assert_eq!(get_m128(r, i), get_m128(e, i));
2101 }
2102 }
2103
2104 #[simd_test(enable = "sse")]
2105 unsafe fn test_mm_rcp_ps() {
2106 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2107 let r = _mm_rcp_ps(a);
2108 let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2109 let rel_err = 0.00048828125;
2110 for i in 0..4 {
2111 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2112 }
2113 }
2114
2115 #[simd_test(enable = "sse")]
2116 unsafe fn test_mm_rsqrt_ss() {
2117 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2118 let r = _mm_rsqrt_ss(a);
2119 let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2120 let rel_err = 0.00048828125;
2121 for i in 0..4 {
2122 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2123 }
2124 }
2125
2126 #[simd_test(enable = "sse")]
2127 unsafe fn test_mm_rsqrt_ps() {
2128 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2129 let r = _mm_rsqrt_ps(a);
2130 let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2131 let rel_err = 0.00048828125;
2132 for i in 0..4 {
2133 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2134 }
2135 }
2136
2137 #[simd_test(enable = "sse")]
2138 unsafe fn test_mm_min_ss() {
2139 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2140 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2141 let r = _mm_min_ss(a, b);
2142 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2143 }
2144
2145 #[simd_test(enable = "sse")]
2146 unsafe fn test_mm_min_ps() {
2147 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2148 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2149 let r = _mm_min_ps(a, b);
2150 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2151
2152 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2158 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2159 let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2160 let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2161 let a: [u8; 16] = transmute(a);
2162 let b: [u8; 16] = transmute(b);
2163 assert_eq!(r1, b);
2164 assert_eq!(r2, a);
2165 assert_ne!(a, b); }
2167
2168 #[simd_test(enable = "sse")]
2169 unsafe fn test_mm_max_ss() {
2170 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2171 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2172 let r = _mm_max_ss(a, b);
2173 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2174 }
2175
2176 #[simd_test(enable = "sse")]
2177 unsafe fn test_mm_max_ps() {
2178 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2179 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2180 let r = _mm_max_ps(a, b);
2181 assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2182
2183 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2185 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2186 let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
2187 let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
2188 let a: [u8; 16] = transmute(a);
2189 let b: [u8; 16] = transmute(b);
2190 assert_eq!(r1, b);
2191 assert_eq!(r2, a);
2192 assert_ne!(a, b); }
2194
2195 #[simd_test(enable = "sse")]
2196 unsafe fn test_mm_and_ps() {
2197 let a = transmute(u32x4::splat(0b0011));
2198 let b = transmute(u32x4::splat(0b0101));
2199 let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2200 let e = transmute(u32x4::splat(0b0001));
2201 assert_eq_m128(r, e);
2202 }
2203
2204 #[simd_test(enable = "sse")]
2205 unsafe fn test_mm_andnot_ps() {
2206 let a = transmute(u32x4::splat(0b0011));
2207 let b = transmute(u32x4::splat(0b0101));
2208 let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2209 let e = transmute(u32x4::splat(0b0100));
2210 assert_eq_m128(r, e);
2211 }
2212
2213 #[simd_test(enable = "sse")]
2214 unsafe fn test_mm_or_ps() {
2215 let a = transmute(u32x4::splat(0b0011));
2216 let b = transmute(u32x4::splat(0b0101));
2217 let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2218 let e = transmute(u32x4::splat(0b0111));
2219 assert_eq_m128(r, e);
2220 }
2221
2222 #[simd_test(enable = "sse")]
2223 unsafe fn test_mm_xor_ps() {
2224 let a = transmute(u32x4::splat(0b0011));
2225 let b = transmute(u32x4::splat(0b0101));
2226 let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2227 let e = transmute(u32x4::splat(0b0110));
2228 assert_eq_m128(r, e);
2229 }
2230
2231 #[simd_test(enable = "sse")]
2232 unsafe fn test_mm_cmpeq_ss() {
2233 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2234 let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2235 let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
2236 let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
2237 assert_eq!(r, e);
2238
2239 let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2240 let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
2241 let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
2242 assert_eq!(r2, e2);
2243 }
2244
2245 #[simd_test(enable = "sse")]
2246 unsafe fn test_mm_cmplt_ss() {
2247 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2248 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2249 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2250 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2251
2252 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
2257 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2258 assert_eq!(rb, eb);
2259
2260 let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
2261 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2262 assert_eq!(rc, ec);
2263
2264 let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
2265 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2266 assert_eq!(rd, ed);
2267 }
2268
2269 #[simd_test(enable = "sse")]
2270 unsafe fn test_mm_cmple_ss() {
2271 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2272 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2273 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2274 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2275
2276 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
2281 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2282 assert_eq!(rb, eb);
2283
2284 let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
2285 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2286 assert_eq!(rc, ec);
2287
2288 let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
2289 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2290 assert_eq!(rd, ed);
2291 }
2292
2293 #[simd_test(enable = "sse")]
2294 unsafe fn test_mm_cmpgt_ss() {
2295 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2296 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2297 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2298 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2299
2300 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
2305 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2306 assert_eq!(rb, eb);
2307
2308 let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
2309 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2310 assert_eq!(rc, ec);
2311
2312 let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
2313 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2314 assert_eq!(rd, ed);
2315 }
2316
2317 #[simd_test(enable = "sse")]
2318 unsafe fn test_mm_cmpge_ss() {
2319 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2320 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2321 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2322 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2323
2324 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
2329 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2330 assert_eq!(rb, eb);
2331
2332 let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
2333 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2334 assert_eq!(rc, ec);
2335
2336 let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
2337 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2338 assert_eq!(rd, ed);
2339 }
2340
2341 #[simd_test(enable = "sse")]
2342 unsafe fn test_mm_cmpneq_ss() {
2343 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2344 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2345 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2346 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2347
2348 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
2353 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2354 assert_eq!(rb, eb);
2355
2356 let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
2357 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2358 assert_eq!(rc, ec);
2359
2360 let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
2361 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2362 assert_eq!(rd, ed);
2363 }
2364
2365 #[simd_test(enable = "sse")]
2366 unsafe fn test_mm_cmpnlt_ss() {
2367 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2373 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2374 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2375 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2376
2377 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
2382 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2383 assert_eq!(rb, eb);
2384
2385 let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
2386 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2387 assert_eq!(rc, ec);
2388
2389 let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
2390 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2391 assert_eq!(rd, ed);
2392 }
2393
2394 #[simd_test(enable = "sse")]
2395 unsafe fn test_mm_cmpnle_ss() {
2396 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2402 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2403 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2404 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2405
2406 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
2411 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2412 assert_eq!(rb, eb);
2413
2414 let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
2415 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2416 assert_eq!(rc, ec);
2417
2418 let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
2419 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2420 assert_eq!(rd, ed);
2421 }
2422
2423 #[simd_test(enable = "sse")]
2424 unsafe fn test_mm_cmpngt_ss() {
2425 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2431 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2432 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2433 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2434
2435 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
2440 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2441 assert_eq!(rb, eb);
2442
2443 let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
2444 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2445 assert_eq!(rc, ec);
2446
2447 let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
2448 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2449 assert_eq!(rd, ed);
2450 }
2451
2452 #[simd_test(enable = "sse")]
2453 unsafe fn test_mm_cmpnge_ss() {
2454 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2460 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2461 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2462 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2463
2464 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
2469 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2470 assert_eq!(rb, eb);
2471
2472 let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
2473 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2474 assert_eq!(rc, ec);
2475
2476 let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
2477 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2478 assert_eq!(rd, ed);
2479 }
2480
2481 #[simd_test(enable = "sse")]
2482 unsafe fn test_mm_cmpord_ss() {
2483 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2484 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2485 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2486 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2487
2488 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
2493 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2494 assert_eq!(rb, eb);
2495
2496 let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
2497 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2498 assert_eq!(rc, ec);
2499
2500 let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
2501 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2502 assert_eq!(rd, ed);
2503 }
2504
2505 #[simd_test(enable = "sse")]
2506 unsafe fn test_mm_cmpunord_ss() {
2507 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2508 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2509 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2510 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2511
2512 let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
2517 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2518 assert_eq!(rb, eb);
2519
2520 let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
2521 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2522 assert_eq!(rc, ec);
2523
2524 let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
2525 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2526 assert_eq!(rd, ed);
2527 }
2528
2529 #[simd_test(enable = "sse")]
2530 unsafe fn test_mm_cmpeq_ps() {
2531 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2532 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2533 let tru = !0u32;
2534 let fls = 0u32;
2535
2536 let e = u32x4::new(fls, fls, tru, fls);
2537 let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
2538 assert_eq!(r, e);
2539 }
2540
2541 #[simd_test(enable = "sse")]
2542 unsafe fn test_mm_cmplt_ps() {
2543 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2544 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2545 let tru = !0u32;
2546 let fls = 0u32;
2547
2548 let e = u32x4::new(tru, fls, fls, fls);
2549 let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
2550 assert_eq!(r, e);
2551 }
2552
2553 #[simd_test(enable = "sse")]
2554 unsafe fn test_mm_cmple_ps() {
2555 let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2556 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2557 let tru = !0u32;
2558 let fls = 0u32;
2559
2560 let e = u32x4::new(tru, fls, tru, fls);
2561 let r: u32x4 = transmute(_mm_cmple_ps(a, b));
2562 assert_eq!(r, e);
2563 }
2564
2565 #[simd_test(enable = "sse")]
2566 unsafe fn test_mm_cmpgt_ps() {
2567 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2568 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2569 let tru = !0u32;
2570 let fls = 0u32;
2571
2572 let e = u32x4::new(fls, tru, fls, fls);
2573 let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
2574 assert_eq!(r, e);
2575 }
2576
2577 #[simd_test(enable = "sse")]
2578 unsafe fn test_mm_cmpge_ps() {
2579 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2580 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2581 let tru = !0u32;
2582 let fls = 0u32;
2583
2584 let e = u32x4::new(fls, tru, tru, fls);
2585 let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
2586 assert_eq!(r, e);
2587 }
2588
2589 #[simd_test(enable = "sse")]
2590 unsafe fn test_mm_cmpneq_ps() {
2591 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2592 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2593 let tru = !0u32;
2594 let fls = 0u32;
2595
2596 let e = u32x4::new(tru, tru, fls, tru);
2597 let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
2598 assert_eq!(r, e);
2599 }
2600
2601 #[simd_test(enable = "sse")]
2602 unsafe fn test_mm_cmpnlt_ps() {
2603 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2604 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2605 let tru = !0u32;
2606 let fls = 0u32;
2607
2608 let e = u32x4::new(fls, tru, tru, tru);
2609 let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
2610 assert_eq!(r, e);
2611 }
2612
2613 #[simd_test(enable = "sse")]
2614 unsafe fn test_mm_cmpnle_ps() {
2615 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2616 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2617 let tru = !0u32;
2618 let fls = 0u32;
2619
2620 let e = u32x4::new(fls, tru, fls, tru);
2621 let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
2622 assert_eq!(r, e);
2623 }
2624
2625 #[simd_test(enable = "sse")]
2626 unsafe fn test_mm_cmpngt_ps() {
2627 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2628 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2629 let tru = !0u32;
2630 let fls = 0u32;
2631
2632 let e = u32x4::new(tru, fls, tru, tru);
2633 let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
2634 assert_eq!(r, e);
2635 }
2636
2637 #[simd_test(enable = "sse")]
2638 unsafe fn test_mm_cmpnge_ps() {
2639 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2640 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2641 let tru = !0u32;
2642 let fls = 0u32;
2643
2644 let e = u32x4::new(tru, fls, fls, tru);
2645 let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
2646 assert_eq!(r, e);
2647 }
2648
2649 #[simd_test(enable = "sse")]
2650 unsafe fn test_mm_cmpord_ps() {
2651 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2652 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2653 let tru = !0u32;
2654 let fls = 0u32;
2655
2656 let e = u32x4::new(tru, fls, fls, fls);
2657 let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
2658 assert_eq!(r, e);
2659 }
2660
2661 #[simd_test(enable = "sse")]
2662 unsafe fn test_mm_cmpunord_ps() {
2663 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2664 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2665 let tru = !0u32;
2666 let fls = 0u32;
2667
2668 let e = u32x4::new(fls, tru, tru, tru);
2669 let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
2670 assert_eq!(r, e);
2671 }
2672
2673 #[simd_test(enable = "sse")]
2674 unsafe fn test_mm_comieq_ss() {
2675 let aa = &[3.0f32, 12.0, 23.0, NAN];
2676 let bb = &[3.0f32, 47.5, 1.5, NAN];
2677
2678 let ee = &[1i32, 0, 0, 0];
2679
2680 for i in 0..4 {
2681 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2682 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2683
2684 let r = _mm_comieq_ss(a, b);
2685
2686 assert_eq!(
2687 ee[i], r,
2688 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2689 a, b, r, ee[i], i
2690 );
2691 }
2692 }
2693
2694 #[simd_test(enable = "sse")]
2695 unsafe fn test_mm_comilt_ss() {
2696 let aa = &[3.0f32, 12.0, 23.0, NAN];
2697 let bb = &[3.0f32, 47.5, 1.5, NAN];
2698
2699 let ee = &[0i32, 1, 0, 0];
2700
2701 for i in 0..4 {
2702 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2703 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2704
2705 let r = _mm_comilt_ss(a, b);
2706
2707 assert_eq!(
2708 ee[i], r,
2709 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2710 a, b, r, ee[i], i
2711 );
2712 }
2713 }
2714
2715 #[simd_test(enable = "sse")]
2716 unsafe fn test_mm_comile_ss() {
2717 let aa = &[3.0f32, 12.0, 23.0, NAN];
2718 let bb = &[3.0f32, 47.5, 1.5, NAN];
2719
2720 let ee = &[1i32, 1, 0, 0];
2721
2722 for i in 0..4 {
2723 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2724 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2725
2726 let r = _mm_comile_ss(a, b);
2727
2728 assert_eq!(
2729 ee[i], r,
2730 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2731 a, b, r, ee[i], i
2732 );
2733 }
2734 }
2735
2736 #[simd_test(enable = "sse")]
2737 unsafe fn test_mm_comigt_ss() {
2738 let aa = &[3.0f32, 12.0, 23.0, NAN];
2739 let bb = &[3.0f32, 47.5, 1.5, NAN];
2740
2741 let ee = &[1i32, 0, 1, 0];
2742
2743 for i in 0..4 {
2744 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2745 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2746
2747 let r = _mm_comige_ss(a, b);
2748
2749 assert_eq!(
2750 ee[i], r,
2751 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2752 a, b, r, ee[i], i
2753 );
2754 }
2755 }
2756
2757 #[simd_test(enable = "sse")]
2758 unsafe fn test_mm_comineq_ss() {
2759 let aa = &[3.0f32, 12.0, 23.0, NAN];
2760 let bb = &[3.0f32, 47.5, 1.5, NAN];
2761
2762 let ee = &[0i32, 1, 1, 1];
2763
2764 for i in 0..4 {
2765 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2766 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2767
2768 let r = _mm_comineq_ss(a, b);
2769
2770 assert_eq!(
2771 ee[i], r,
2772 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2773 a, b, r, ee[i], i
2774 );
2775 }
2776 }
2777
2778 #[simd_test(enable = "sse")]
2779 unsafe fn test_mm_ucomieq_ss() {
2780 let aa = &[3.0f32, 12.0, 23.0, NAN];
2781 let bb = &[3.0f32, 47.5, 1.5, NAN];
2782
2783 let ee = &[1i32, 0, 0, 0];
2784
2785 for i in 0..4 {
2786 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2787 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2788
2789 let r = _mm_ucomieq_ss(a, b);
2790
2791 assert_eq!(
2792 ee[i], r,
2793 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2794 a, b, r, ee[i], i
2795 );
2796 }
2797 }
2798
2799 #[simd_test(enable = "sse")]
2800 unsafe fn test_mm_ucomilt_ss() {
2801 let aa = &[3.0f32, 12.0, 23.0, NAN];
2802 let bb = &[3.0f32, 47.5, 1.5, NAN];
2803
2804 let ee = &[0i32, 1, 0, 0];
2805
2806 for i in 0..4 {
2807 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2808 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2809
2810 let r = _mm_ucomilt_ss(a, b);
2811
2812 assert_eq!(
2813 ee[i], r,
2814 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2815 a, b, r, ee[i], i
2816 );
2817 }
2818 }
2819
2820 #[simd_test(enable = "sse")]
2821 unsafe fn test_mm_ucomile_ss() {
2822 let aa = &[3.0f32, 12.0, 23.0, NAN];
2823 let bb = &[3.0f32, 47.5, 1.5, NAN];
2824
2825 let ee = &[1i32, 1, 0, 0];
2826
2827 for i in 0..4 {
2828 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2829 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2830
2831 let r = _mm_ucomile_ss(a, b);
2832
2833 assert_eq!(
2834 ee[i], r,
2835 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2836 a, b, r, ee[i], i
2837 );
2838 }
2839 }
2840
2841 #[simd_test(enable = "sse")]
2842 unsafe fn test_mm_ucomigt_ss() {
2843 let aa = &[3.0f32, 12.0, 23.0, NAN];
2844 let bb = &[3.0f32, 47.5, 1.5, NAN];
2845
2846 let ee = &[0i32, 0, 1, 0];
2847
2848 for i in 0..4 {
2849 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2850 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2851
2852 let r = _mm_ucomigt_ss(a, b);
2853
2854 assert_eq!(
2855 ee[i], r,
2856 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2857 a, b, r, ee[i], i
2858 );
2859 }
2860 }
2861
2862 #[simd_test(enable = "sse")]
2863 unsafe fn test_mm_ucomige_ss() {
2864 let aa = &[3.0f32, 12.0, 23.0, NAN];
2865 let bb = &[3.0f32, 47.5, 1.5, NAN];
2866
2867 let ee = &[1i32, 0, 1, 0];
2868
2869 for i in 0..4 {
2870 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2871 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2872
2873 let r = _mm_ucomige_ss(a, b);
2874
2875 assert_eq!(
2876 ee[i], r,
2877 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2878 a, b, r, ee[i], i
2879 );
2880 }
2881 }
2882
2883 #[simd_test(enable = "sse")]
2884 unsafe fn test_mm_ucomineq_ss() {
2885 let aa = &[3.0f32, 12.0, 23.0, NAN];
2886 let bb = &[3.0f32, 47.5, 1.5, NAN];
2887
2888 let ee = &[0i32, 1, 1, 1];
2889
2890 for i in 0..4 {
2891 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2892 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2893
2894 let r = _mm_ucomineq_ss(a, b);
2895
2896 assert_eq!(
2897 ee[i], r,
2898 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2899 a, b, r, ee[i], i
2900 );
2901 }
2902 }
2903
2904 #[allow(deprecated)] #[simd_test(enable = "sse")]
2906 #[cfg_attr(miri, ignore)] unsafe fn test_mm_comieq_ss_vs_ucomieq_ss() {
2908 let aa = &[3.0f32, NAN, 23.0, NAN];
2911 let bb = &[3.0f32, 47.5, NAN, NAN];
2912
2913 let ee = &[1i32, 0, 0, 0];
2914 let exc = &[0u32, 1, 1, 1]; for i in 0..4 {
2917 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2918 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2919
2920 _MM_SET_EXCEPTION_STATE(0);
2921 let r1 = _mm_comieq_ss(*black_box(&a), b);
2922 let s1 = _MM_GET_EXCEPTION_STATE();
2923
2924 _MM_SET_EXCEPTION_STATE(0);
2925 let r2 = _mm_ucomieq_ss(*black_box(&a), b);
2926 let s2 = _MM_GET_EXCEPTION_STATE();
2927
2928 assert_eq!(
2929 ee[i], r1,
2930 "_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2931 a, b, r1, ee[i], i
2932 );
2933 assert_eq!(
2934 ee[i], r2,
2935 "_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2936 a, b, r2, ee[i], i
2937 );
2938 assert_eq!(
2939 s1,
2940 exc[i] * _MM_EXCEPT_INVALID,
2941 "_mm_comieq_ss() set exception flags: {} (i={})",
2942 s1,
2943 i
2944 );
2945 assert_eq!(
2946 s2,
2947 0, "_mm_ucomieq_ss() set exception flags: {} (i={})",
2949 s2,
2950 i
2951 );
2952 }
2953 }
2954
2955 #[simd_test(enable = "sse")]
2956 unsafe fn test_mm_cvtss_si32() {
2957 let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2958 let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2959 for i in 0..inputs.len() {
2960 let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2961 let e = result[i];
2962 let r = _mm_cvtss_si32(x);
2963 assert_eq!(
2964 e, r,
2965 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2966 i, x, r, e
2967 );
2968 }
2969 }
2970
2971 #[simd_test(enable = "sse")]
2972 unsafe fn test_mm_cvttss_si32() {
2973 let inputs = &[
2974 (42.0f32, 42i32),
2975 (-31.4, -31),
2976 (-33.5, -33),
2977 (-34.5, -34),
2978 (10.999, 10),
2979 (-5.99, -5),
2980 (4.0e10, i32::MIN),
2981 (4.0e-10, 0),
2982 (NAN, i32::MIN),
2983 (2147483500.1, 2147483520),
2984 ];
2985 for (i, &(xi, e)) in inputs.iter().enumerate() {
2986 let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
2987 let r = _mm_cvttss_si32(x);
2988 assert_eq!(
2989 e, r,
2990 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
2991 i, x, r, e
2992 );
2993 }
2994 }
2995
2996 #[simd_test(enable = "sse")]
2997 unsafe fn test_mm_cvtsi32_ss() {
2998 let inputs = &[
2999 (4555i32, 4555.0f32),
3000 (322223333, 322223330.0),
3001 (-432, -432.0),
3002 (-322223333, -322223330.0),
3003 ];
3004
3005 for &(x, f) in inputs.iter() {
3006 let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3007 let r = _mm_cvtsi32_ss(a, x);
3008 let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
3009 assert_eq_m128(e, r);
3010 }
3011 }
3012
3013 #[simd_test(enable = "sse")]
3014 unsafe fn test_mm_cvtss_f32() {
3015 let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
3016 assert_eq!(_mm_cvtss_f32(a), 312.0134);
3017 }
3018
3019 #[simd_test(enable = "sse")]
3020 unsafe fn test_mm_set_ss() {
3021 let r = _mm_set_ss(black_box(4.25));
3022 assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
3023 }
3024
3025 #[simd_test(enable = "sse")]
3026 unsafe fn test_mm_set1_ps() {
3027 let r1 = _mm_set1_ps(black_box(4.25));
3028 let r2 = _mm_set_ps1(black_box(4.25));
3029 assert_eq!(get_m128(r1, 0), 4.25);
3030 assert_eq!(get_m128(r1, 1), 4.25);
3031 assert_eq!(get_m128(r1, 2), 4.25);
3032 assert_eq!(get_m128(r1, 3), 4.25);
3033 assert_eq!(get_m128(r2, 0), 4.25);
3034 assert_eq!(get_m128(r2, 1), 4.25);
3035 assert_eq!(get_m128(r2, 2), 4.25);
3036 assert_eq!(get_m128(r2, 3), 4.25);
3037 }
3038
3039 #[simd_test(enable = "sse")]
3040 unsafe fn test_mm_set_ps() {
3041 let r = _mm_set_ps(
3042 black_box(1.0),
3043 black_box(2.0),
3044 black_box(3.0),
3045 black_box(4.0),
3046 );
3047 assert_eq!(get_m128(r, 0), 4.0);
3048 assert_eq!(get_m128(r, 1), 3.0);
3049 assert_eq!(get_m128(r, 2), 2.0);
3050 assert_eq!(get_m128(r, 3), 1.0);
3051 }
3052
3053 #[simd_test(enable = "sse")]
3054 unsafe fn test_mm_setr_ps() {
3055 let r = _mm_setr_ps(
3056 black_box(1.0),
3057 black_box(2.0),
3058 black_box(3.0),
3059 black_box(4.0),
3060 );
3061 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3062 }
3063
3064 #[simd_test(enable = "sse")]
3065 unsafe fn test_mm_setzero_ps() {
3066 let r = *black_box(&_mm_setzero_ps());
3067 assert_eq_m128(r, _mm_set1_ps(0.0));
3068 }
3069
3070 #[simd_test(enable = "sse")]
3071 unsafe fn test_mm_shuffle() {
3072 assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3073 assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3074 assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3075 }
3076
3077 #[simd_test(enable = "sse")]
3078 unsafe fn test_mm_shuffle_ps() {
3079 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3080 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3081 let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3082 assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3083 }
3084
3085 #[simd_test(enable = "sse")]
3086 unsafe fn test_mm_unpackhi_ps() {
3087 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3088 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3089 let r = _mm_unpackhi_ps(a, b);
3090 assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3091 }
3092
3093 #[simd_test(enable = "sse")]
3094 unsafe fn test_mm_unpacklo_ps() {
3095 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3096 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3097 let r = _mm_unpacklo_ps(a, b);
3098 assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3099 }
3100
3101 #[simd_test(enable = "sse")]
3102 unsafe fn test_mm_movehl_ps() {
3103 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3104 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3105 let r = _mm_movehl_ps(a, b);
3106 assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3107 }
3108
3109 #[simd_test(enable = "sse")]
3110 unsafe fn test_mm_movelh_ps() {
3111 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3112 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3113 let r = _mm_movelh_ps(a, b);
3114 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3115 }
3116
3117 #[simd_test(enable = "sse")]
3118 unsafe fn test_mm_load_ss() {
3119 let a = 42.0f32;
3120 let r = _mm_load_ss(ptr::addr_of!(a));
3121 assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3122 }
3123
3124 #[simd_test(enable = "sse")]
3125 unsafe fn test_mm_load1_ps() {
3126 let a = 42.0f32;
3127 let r = _mm_load1_ps(ptr::addr_of!(a));
3128 assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3129 }
3130
3131 #[simd_test(enable = "sse")]
3132 unsafe fn test_mm_load_ps() {
3133 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3134
3135 let mut p = vals.as_ptr();
3136 let mut fixup = 0.0f32;
3137
3138 let unalignment = (p as usize) & 0xf;
3142 if unalignment != 0 {
3143 let delta = (16 - unalignment) >> 2;
3144 fixup = delta as f32;
3145 p = p.add(delta);
3146 }
3147
3148 let r = _mm_load_ps(p);
3149 let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
3150 assert_eq_m128(r, e);
3151 }
3152
3153 #[simd_test(enable = "sse")]
3154 unsafe fn test_mm_loadu_ps() {
3155 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3156 let p = vals.as_ptr().add(3);
3157 let r = _mm_loadu_ps(black_box(p));
3158 assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3159 }
3160
3161 #[simd_test(enable = "sse")]
3162 unsafe fn test_mm_loadr_ps() {
3163 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3164
3165 let mut p = vals.as_ptr();
3166 let mut fixup = 0.0f32;
3167
3168 let unalignment = (p as usize) & 0xf;
3172 if unalignment != 0 {
3173 let delta = (16 - unalignment) >> 2;
3174 fixup = delta as f32;
3175 p = p.add(delta);
3176 }
3177
3178 let r = _mm_loadr_ps(p);
3179 let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
3180 assert_eq_m128(r, e);
3181 }
3182
3183 #[simd_test(enable = "sse")]
3184 unsafe fn test_mm_store_ss() {
3185 let mut vals = [0.0f32; 8];
3186 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3187 _mm_store_ss(vals.as_mut_ptr().add(1), a);
3188
3189 assert_eq!(vals[0], 0.0);
3190 assert_eq!(vals[1], 1.0);
3191 assert_eq!(vals[2], 0.0);
3192 }
3193
3194 #[simd_test(enable = "sse")]
3195 unsafe fn test_mm_store1_ps() {
3196 let mut vals = [0.0f32; 8];
3197 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3198
3199 let mut ofs = 0;
3200 let mut p = vals.as_mut_ptr();
3201
3202 if (p as usize) & 0xf != 0 {
3203 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3204 p = p.add(ofs);
3205 }
3206
3207 _mm_store1_ps(p, *black_box(&a));
3208
3209 if ofs > 0 {
3210 assert_eq!(vals[ofs - 1], 0.0);
3211 }
3212 assert_eq!(vals[ofs + 0], 1.0);
3213 assert_eq!(vals[ofs + 1], 1.0);
3214 assert_eq!(vals[ofs + 2], 1.0);
3215 assert_eq!(vals[ofs + 3], 1.0);
3216 assert_eq!(vals[ofs + 4], 0.0);
3217 }
3218
3219 #[simd_test(enable = "sse")]
3220 unsafe fn test_mm_store_ps() {
3221 let mut vals = [0.0f32; 8];
3222 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3223
3224 let mut ofs = 0;
3225 let mut p = vals.as_mut_ptr();
3226
3227 if (p as usize) & 0xf != 0 {
3229 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3230 p = p.add(ofs);
3231 }
3232
3233 _mm_store_ps(p, *black_box(&a));
3234
3235 if ofs > 0 {
3236 assert_eq!(vals[ofs - 1], 0.0);
3237 }
3238 assert_eq!(vals[ofs + 0], 1.0);
3239 assert_eq!(vals[ofs + 1], 2.0);
3240 assert_eq!(vals[ofs + 2], 3.0);
3241 assert_eq!(vals[ofs + 3], 4.0);
3242 assert_eq!(vals[ofs + 4], 0.0);
3243 }
3244
3245 #[simd_test(enable = "sse")]
3246 unsafe fn test_mm_storer_ps() {
3247 let mut vals = [0.0f32; 8];
3248 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3249
3250 let mut ofs = 0;
3251 let mut p = vals.as_mut_ptr();
3252
3253 if (p as usize) & 0xf != 0 {
3255 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3256 p = p.add(ofs);
3257 }
3258
3259 _mm_storer_ps(p, *black_box(&a));
3260
3261 if ofs > 0 {
3262 assert_eq!(vals[ofs - 1], 0.0);
3263 }
3264 assert_eq!(vals[ofs + 0], 4.0);
3265 assert_eq!(vals[ofs + 1], 3.0);
3266 assert_eq!(vals[ofs + 2], 2.0);
3267 assert_eq!(vals[ofs + 3], 1.0);
3268 assert_eq!(vals[ofs + 4], 0.0);
3269 }
3270
3271 #[simd_test(enable = "sse")]
3272 unsafe fn test_mm_storeu_ps() {
3273 let mut vals = [0.0f32; 8];
3274 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3275
3276 let mut ofs = 0;
3277 let mut p = vals.as_mut_ptr();
3278
3279 if (p as usize) & 0xf == 0 {
3281 ofs = 1;
3282 p = p.add(1);
3283 }
3284
3285 _mm_storeu_ps(p, *black_box(&a));
3286
3287 if ofs > 0 {
3288 assert_eq!(vals[ofs - 1], 0.0);
3289 }
3290 assert_eq!(vals[ofs + 0], 1.0);
3291 assert_eq!(vals[ofs + 1], 2.0);
3292 assert_eq!(vals[ofs + 2], 3.0);
3293 assert_eq!(vals[ofs + 3], 4.0);
3294 assert_eq!(vals[ofs + 4], 0.0);
3295 }
3296
3297 #[simd_test(enable = "sse")]
3298 unsafe fn test_mm_move_ss() {
3299 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3300 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3301
3302 let r = _mm_move_ss(a, b);
3303 let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3304 assert_eq_m128(e, r);
3305 }
3306
3307 #[simd_test(enable = "sse")]
3308 unsafe fn test_mm_movemask_ps() {
3309 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3310 assert_eq!(r, 0b0101);
3311
3312 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3313 assert_eq!(r, 0b0111);
3314 }
3315
3316 #[simd_test(enable = "sse")]
3317 #[cfg_attr(miri, ignore)]
3319 unsafe fn test_mm_sfence() {
3320 _mm_sfence();
3321 }
3322
3323 #[allow(deprecated)] #[simd_test(enable = "sse")]
3325 #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_1() {
3327 let saved_csr = _mm_getcsr();
3328
3329 let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3330 let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
3331
3332 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
3333 let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3334
3335 _mm_setcsr(saved_csr);
3336
3337 let exp = _mm_setr_ps(0.0, 0.0, 0.0, 1.0);
3338 assert_eq_m128(r, exp); }
3340
3341 #[allow(deprecated)] #[simd_test(enable = "sse")]
3343 #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_2() {
3345 let saved_csr = _mm_getcsr();
3348
3349 let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3350 let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
3351
3352 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
3353 let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3354
3355 _mm_setcsr(saved_csr);
3356
3357 let exp = _mm_setr_ps(1.1e-39, 0.0, 0.0, 1.0);
3358 assert_eq_m128(r, exp); }
3360
3361 #[allow(deprecated)] #[simd_test(enable = "sse")]
3363 #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_underflow() {
3365 _MM_SET_EXCEPTION_STATE(0);
3366
3367 let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3368 let b = _mm_setr_ps(1e-5, 0.0, 0.0, 1.0);
3369
3370 assert_eq!(_MM_GET_EXCEPTION_STATE(), 0); let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3373
3374 let exp = _mm_setr_ps(1.1e-41, 0.0, 0.0, 1.0);
3375 assert_eq_m128(r, exp);
3376
3377 let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
3378 assert!(underflow);
3379 }
3380
3381 #[simd_test(enable = "sse")]
3382 unsafe fn test_MM_TRANSPOSE4_PS() {
3383 let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3384 let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3385 let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3386 let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3387
3388 _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3389
3390 assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3391 assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3392 assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3393 assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3394 }
3395
3396 #[repr(align(16))]
3397 struct Memory {
3398 pub data: [f32; 4],
3399 }
3400
3401 #[simd_test(enable = "sse")]
3402 #[cfg_attr(miri, ignore)]
3405 unsafe fn test_mm_stream_ps() {
3406 let a = _mm_set1_ps(7.0);
3407 let mut mem = Memory { data: [-1.0; 4] };
3408
3409 _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3410 for i in 0..4 {
3411 assert_eq!(mem.data[i], get_m128(a, i));
3412 }
3413 }
3414}