Lines Matching refs:__A

105 _mm_add_ss (__m128 __A, __m128 __B)
107 return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
111 _mm_sub_ss (__m128 __A, __m128 __B)
113 return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
117 _mm_mul_ss (__m128 __A, __m128 __B)
119 return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
123 _mm_div_ss (__m128 __A, __m128 __B)
125 return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
129 _mm_sqrt_ss (__m128 __A)
131 return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
135 _mm_rcp_ss (__m128 __A)
137 return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
141 _mm_rsqrt_ss (__m128 __A)
143 return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
147 _mm_min_ss (__m128 __A, __m128 __B)
149 return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
153 _mm_max_ss (__m128 __A, __m128 __B)
155 return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
161 _mm_add_ps (__m128 __A, __m128 __B)
163 return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
167 _mm_sub_ps (__m128 __A, __m128 __B)
169 return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
173 _mm_mul_ps (__m128 __A, __m128 __B)
175 return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
179 _mm_div_ps (__m128 __A, __m128 __B)
181 return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
185 _mm_sqrt_ps (__m128 __A)
187 return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
191 _mm_rcp_ps (__m128 __A)
193 return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
197 _mm_rsqrt_ps (__m128 __A)
199 return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
203 _mm_min_ps (__m128 __A, __m128 __B)
205 return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
209 _mm_max_ps (__m128 __A, __m128 __B)
211 return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
217 _mm_and_ps (__m128 __A, __m128 __B)
219 return __builtin_ia32_andps (__A, __B);
223 _mm_andnot_ps (__m128 __A, __m128 __B)
225 return __builtin_ia32_andnps (__A, __B);
229 _mm_or_ps (__m128 __A, __m128 __B)
231 return __builtin_ia32_orps (__A, __B);
235 _mm_xor_ps (__m128 __A, __m128 __B)
237 return __builtin_ia32_xorps (__A, __B);
245 _mm_cmpeq_ss (__m128 __A, __m128 __B)
247 return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
251 _mm_cmplt_ss (__m128 __A, __m128 __B)
253 return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
257 _mm_cmple_ss (__m128 __A, __m128 __B)
259 return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
263 _mm_cmpgt_ss (__m128 __A, __m128 __B)
265 return (__m128) __builtin_ia32_movss ((__v4sf) __A,
269 __A));
273 _mm_cmpge_ss (__m128 __A, __m128 __B)
275 return (__m128) __builtin_ia32_movss ((__v4sf) __A,
279 __A));
283 _mm_cmpneq_ss (__m128 __A, __m128 __B)
285 return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
289 _mm_cmpnlt_ss (__m128 __A, __m128 __B)
291 return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
295 _mm_cmpnle_ss (__m128 __A, __m128 __B)
297 return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
301 _mm_cmpngt_ss (__m128 __A, __m128 __B)
303 return (__m128) __builtin_ia32_movss ((__v4sf) __A,
307 __A));
311 _mm_cmpnge_ss (__m128 __A, __m128 __B)
313 return (__m128) __builtin_ia32_movss ((__v4sf) __A,
317 __A));
321 _mm_cmpord_ss (__m128 __A, __m128 __B)
323 return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
327 _mm_cmpunord_ss (__m128 __A, __m128 __B)
329 return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
337 _mm_cmpeq_ps (__m128 __A, __m128 __B)
339 return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
343 _mm_cmplt_ps (__m128 __A, __m128 __B)
345 return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
349 _mm_cmple_ps (__m128 __A, __m128 __B)
351 return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
355 _mm_cmpgt_ps (__m128 __A, __m128 __B)
357 return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
361 _mm_cmpge_ps (__m128 __A, __m128 __B)
363 return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
367 _mm_cmpneq_ps (__m128 __A, __m128 __B)
369 return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
373 _mm_cmpnlt_ps (__m128 __A, __m128 __B)
375 return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
379 _mm_cmpnle_ps (__m128 __A, __m128 __B)
381 return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
385 _mm_cmpngt_ps (__m128 __A, __m128 __B)
387 return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
391 _mm_cmpnge_ps (__m128 __A, __m128 __B)
393 return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
397 _mm_cmpord_ps (__m128 __A, __m128 __B)
399 return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
403 _mm_cmpunord_ps (__m128 __A, __m128 __B)
405 return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
412 _mm_comieq_ss (__m128 __A, __m128 __B)
414 return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
418 _mm_comilt_ss (__m128 __A, __m128 __B)
420 return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
424 _mm_comile_ss (__m128 __A, __m128 __B)
426 return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
430 _mm_comigt_ss (__m128 __A, __m128 __B)
432 return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
436 _mm_comige_ss (__m128 __A, __m128 __B)
438 return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
442 _mm_comineq_ss (__m128 __A, __m128 __B)
444 return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
448 _mm_ucomieq_ss (__m128 __A, __m128 __B)
450 return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
454 _mm_ucomilt_ss (__m128 __A, __m128 __B)
456 return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
460 _mm_ucomile_ss (__m128 __A, __m128 __B)
462 return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
466 _mm_ucomigt_ss (__m128 __A, __m128 __B)
468 return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
472 _mm_ucomige_ss (__m128 __A, __m128 __B)
474 return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
478 _mm_ucomineq_ss (__m128 __A, __m128 __B)
480 return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
486 _mm_cvtss_si32 (__m128 __A)
488 return __builtin_ia32_cvtss2si ((__v4sf) __A);
492 _mm_cvt_ss2si (__m128 __A)
494 return _mm_cvtss_si32 (__A);
503 _mm_cvtss_si64 (__m128 __A)
505 return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
510 _mm_cvtss_si64x (__m128 __A)
512 return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
519 _mm_cvtps_pi32 (__m128 __A)
521 return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
525 _mm_cvt_ps2pi (__m128 __A)
527 return _mm_cvtps_pi32 (__A);
532 _mm_cvttss_si32 (__m128 __A)
534 return __builtin_ia32_cvttss2si ((__v4sf) __A);
538 _mm_cvtt_ss2si (__m128 __A)
540 return _mm_cvttss_si32 (__A);
548 _mm_cvttss_si64 (__m128 __A)
550 return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
555 _mm_cvttss_si64x (__m128 __A)
557 return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
564 _mm_cvttps_pi32 (__m128 __A)
566 return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
570 _mm_cvtt_ps2pi (__m128 __A)
572 return _mm_cvttps_pi32 (__A);
577 _mm_cvtsi32_ss (__m128 __A, int __B)
579 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
583 _mm_cvt_si2ss (__m128 __A, int __B)
585 return _mm_cvtsi32_ss (__A, __B);
593 _mm_cvtsi64_ss (__m128 __A, long long __B)
595 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
600 _mm_cvtsi64x_ss (__m128 __A, long long __B)
602 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
609 _mm_cvtpi32_ps (__m128 __A, __m64 __B)
611 return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
615 _mm_cvt_pi2ps (__m128 __A, __m64 __B)
617 return _mm_cvtpi32_ps (__A, __B);
622 _mm_cvtpi16_ps (__m64 __A)
631 __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
634 __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
635 __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
648 _mm_cvtpu16_ps (__m64 __A)
654 __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
655 __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
668 _mm_cvtpi8_ps (__m64 __A)
675 __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
678 __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
680 return _mm_cvtpi16_ps(__A);
685 _mm_cvtpu8_ps(__m64 __A)
687 __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
688 return _mm_cvtpu16_ps(__A);
693 _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
696 __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
703 _mm_cvtps_pi16(__m128 __A)
705 __v4sf __hisf = (__v4sf)__A;
714 _mm_cvtps_pi8(__m128 __A)
716 __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
723 _mm_shuffle_ps (__m128 __A, __m128 __B, int __mask)
725 return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
735 _mm_unpackhi_ps (__m128 __A, __m128 __B)
737 return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
742 _mm_unpacklo_ps (__m128 __A, __m128 __B)
744 return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
750 _mm_loadh_pi (__m128 __A, __m64 const *__P)
752 return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
757 _mm_storeh_pi (__m64 *__P, __m128 __A)
759 __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
764 _mm_movehl_ps (__m128 __A, __m128 __B)
766 return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
771 _mm_movelh_ps (__m128 __A, __m128 __B)
773 return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
779 _mm_loadl_pi (__m128 __A, __m64 const *__P)
781 return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
786 _mm_storel_pi (__m64 *__P, __m128 __A)
788 __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
793 _mm_movemask_ps (__m128 __A)
795 return __builtin_ia32_movmskps ((__v4sf)__A);
940 _mm_store_ss (float *__P, __m128 __A)
942 *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
946 _mm_cvtss_f32 (__m128 __A)
948 return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
953 _mm_store_ps (float *__P, __m128 __A)
955 *(__v4sf *)__P = (__v4sf)__A;
960 _mm_storeu_ps (float *__P, __m128 __A)
962 __builtin_ia32_storeups (__P, (__v4sf)__A);
967 _mm_store1_ps (float *__P, __m128 __A)
969 __v4sf __va = (__v4sf)__A;
975 _mm_store_ps1 (float *__P, __m128 __A)
977 _mm_store1_ps (__P, __A);
982 _mm_storer_ps (float *__P, __m128 __A)
984 __v4sf __va = (__v4sf)__A;
991 _mm_move_ss (__m128 __A, __m128 __B)
993 return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
999 _mm_extract_pi16 (__m64 const __A, int const __N)
1001 return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
1005 _m_pextrw (__m64 const __A, int const __N)
1007 return _mm_extract_pi16 (__A, __N);
1018 _mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
1020 return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
1024 _m_pinsrw (__m64 const __A, int const __D, int const __N)
1026 return _mm_insert_pi16 (__A, __D, __N);
1036 _mm_max_pi16 (__m64 __A, __m64 __B)
1038 return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
1042 _m_pmaxsw (__m64 __A, __m64 __B)
1044 return _mm_max_pi16 (__A, __B);
1049 _mm_max_pu8 (__m64 __A, __m64 __B)
1051 return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
1055 _m_pmaxub (__m64 __A, __m64 __B)
1057 return _mm_max_pu8 (__A, __B);
1062 _mm_min_pi16 (__m64 __A, __m64 __B)
1064 return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
1068 _m_pminsw (__m64 __A, __m64 __B)
1070 return _mm_min_pi16 (__A, __B);
1075 _mm_min_pu8 (__m64 __A, __m64 __B)
1077 return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
1081 _m_pminub (__m64 __A, __m64 __B)
1083 return _mm_min_pu8 (__A, __B);
1088 _mm_movemask_pi8 (__m64 __A)
1090 return __builtin_ia32_pmovmskb ((__v8qi)__A);
1094 _m_pmovmskb (__m64 __A)
1096 return _mm_movemask_pi8 (__A);
1102 _mm_mulhi_pu16 (__m64 __A, __m64 __B)
1104 return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
1108 _m_pmulhuw (__m64 __A, __m64 __B)
1110 return _mm_mulhi_pu16 (__A, __B);
1117 _mm_shuffle_pi16 (__m64 __A, int __N)
1119 return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
1123 _m_pshufw (__m64 __A, int __N)
1125 return _mm_shuffle_pi16 (__A, __N);
1137 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
1139 __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
1143 _m_maskmovq (__m64 __A, __m64 __N, char *__P)
1145 _mm_maskmove_si64 (__A, __N, __P);
1150 _mm_avg_pu8 (__m64 __A, __m64 __B)
1152 return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
1156 _m_pavgb (__m64 __A, __m64 __B)
1158 return _mm_avg_pu8 (__A, __B);
1163 _mm_avg_pu16 (__m64 __A, __m64 __B)
1165 return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
1169 _m_pavgw (__m64 __A, __m64 __B)
1171 return _mm_avg_pu16 (__A, __B);
1178 _mm_sad_pu8 (__m64 __A, __m64 __B)
1180 return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
1184 _m_psadbw (__m64 __A, __m64 __B)
1186 return _mm_sad_pu8 (__A, __B);
1204 _mm_stream_pi (__m64 *__P, __m64 __A)
1206 __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
1211 _mm_stream_ps (float *__P, __m128 __A)
1213 __builtin_ia32_movntps (__P, (__v4sf)__A);