Lines Matching refs:__b

63 __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
65 return __nv_byte_perm(__a, __b, __c);
79 __DEVICE__ double __dadd_rd(double __a, double __b) {
80 return __nv_dadd_rd(__a, __b);
82 __DEVICE__ double __dadd_rn(double __a, double __b) {
83 return __nv_dadd_rn(__a, __b);
85 __DEVICE__ double __dadd_ru(double __a, double __b) {
86 return __nv_dadd_ru(__a, __b);
88 __DEVICE__ double __dadd_rz(double __a, double __b) {
89 return __nv_dadd_rz(__a, __b);
91 __DEVICE__ double __ddiv_rd(double __a, double __b) {
92 return __nv_ddiv_rd(__a, __b);
94 __DEVICE__ double __ddiv_rn(double __a, double __b) {
95 return __nv_ddiv_rn(__a, __b);
97 __DEVICE__ double __ddiv_ru(double __a, double __b) {
98 return __nv_ddiv_ru(__a, __b);
100 __DEVICE__ double __ddiv_rz(double __a, double __b) {
101 return __nv_ddiv_rz(__a, __b);
103 __DEVICE__ double __dmul_rd(double __a, double __b) {
104 return __nv_dmul_rd(__a, __b);
106 __DEVICE__ double __dmul_rn(double __a, double __b) {
107 return __nv_dmul_rn(__a, __b);
109 __DEVICE__ double __dmul_ru(double __a, double __b) {
110 return __nv_dmul_ru(__a, __b);
112 __DEVICE__ double __dmul_rz(double __a, double __b) {
113 return __nv_dmul_rz(__a, __b);
180 __DEVICE__ double __dsub_rd(double __a, double __b) {
181 return __nv_dsub_rd(__a, __b);
183 __DEVICE__ double __dsub_rn(double __a, double __b) {
184 return __nv_dsub_rn(__a, __b);
186 __DEVICE__ double __dsub_ru(double __a, double __b) {
187 return __nv_dsub_ru(__a, __b);
189 __DEVICE__ double __dsub_rz(double __a, double __b) {
190 return __nv_dsub_rz(__a, __b);
215 __DEVICE__ float __fadd_rd(float __a, float __b) {
216 return __nv_fadd_rd(__a, __b);
218 __DEVICE__ float __fadd_rn(float __a, float __b) {
219 return __nv_fadd_rn(__a, __b);
221 __DEVICE__ float __fadd_ru(float __a, float __b) {
222 return __nv_fadd_ru(__a, __b);
224 __DEVICE__ float __fadd_rz(float __a, float __b) {
225 return __nv_fadd_rz(__a, __b);
227 __DEVICE__ float __fdiv_rd(float __a, float __b) {
228 return __nv_fdiv_rd(__a, __b);
230 __DEVICE__ float __fdiv_rn(float __a, float __b) {
231 return __nv_fdiv_rn(__a, __b);
233 __DEVICE__ float __fdiv_ru(float __a, float __b) {
234 return __nv_fdiv_ru(__a, __b);
236 __DEVICE__ float __fdiv_rz(float __a, float __b) {
237 return __nv_fdiv_rz(__a, __b);
239 __DEVICE__ float __fdividef(float __a, float __b) {
240 return __nv_fast_fdividef(__a, __b);
285 __DEVICE__ double __fma_rd(double __a, double __b, double __c) {
286 return __nv_fma_rd(__a, __b, __c);
288 __DEVICE__ double __fma_rn(double __a, double __b, double __c) {
289 return __nv_fma_rn(__a, __b, __c);
291 __DEVICE__ double __fma_ru(double __a, double __b, double __c) {
292 return __nv_fma_ru(__a, __b, __c);
294 __DEVICE__ double __fma_rz(double __a, double __b, double __c) {
295 return __nv_fma_rz(__a, __b, __c);
297 __DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {
298 return __nv_fmaf_ieee_rd(__a, __b, __c);
300 __DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {
301 return __nv_fmaf_ieee_rn(__a, __b, __c);
303 __DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {
304 return __nv_fmaf_ieee_ru(__a, __b, __c);
306 __DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {
307 return __nv_fmaf_ieee_rz(__a, __b, __c);
309 __DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {
310 return __nv_fmaf_rd(__a, __b, __c);
312 __DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {
313 return __nv_fmaf_rn(__a, __b, __c);
315 __DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {
316 return __nv_fmaf_ru(__a, __b, __c);
318 __DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {
319 return __nv_fmaf_rz(__a, __b, __c);
321 __DEVICE__ float __fmul_rd(float __a, float __b) {
322 return __nv_fmul_rd(__a, __b);
324 __DEVICE__ float __fmul_rn(float __a, float __b) {
325 return __nv_fmul_rn(__a, __b);
327 __DEVICE__ float __fmul_ru(float __a, float __b) {
328 return __nv_fmul_ru(__a, __b);
330 __DEVICE__ float __fmul_rz(float __a, float __b) {
331 return __nv_fmul_rz(__a, __b);
342 __DEVICE__ float __fsub_rd(float __a, float __b) {
343 return __nv_fsub_rd(__a, __b);
345 __DEVICE__ float __fsub_rn(float __a, float __b) {
346 return __nv_fsub_rn(__a, __b);
348 __DEVICE__ float __fsub_ru(float __a, float __b) {
349 return __nv_fsub_ru(__a, __b);
351 __DEVICE__ float __fsub_rz(float __a, float __b) {
352 return __nv_fsub_rz(__a, __b);
354 __DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }
355 __DEVICE__ double __hiloint2double(int __a, int __b) {
356 return __nv_hiloint2double(__a, __b);
514 __DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }
515 __DEVICE__ long long __mul64hi(long long __a, long long __b) {
516 return __nv_mul64hi(__a, __b);
518 __DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }
525 __DEVICE__ float __powf(float __a, float __b) {
526 return __nv_fast_powf(__a, __b);
531 __DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
532 __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
533 return __nv_sad(__a, __b, __c);
660 __DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {
661 return __nv_uhadd(__a, __b);
804 __DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {
805 return __nv_umul24(__a, __b);
808 unsigned long long __b) {
809 return __nv_umul64hi(__a, __b);
811 __DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {
812 return __nv_umulhi(__a, __b);
814 __DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {
815 return __nv_urhadd(__a, __b);
817 __DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,
819 return __nv_usad(__a, __b, __c);
825 __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
826 return __nv_vabsdiffs2(__a, __b);
828 __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
829 return __nv_vabsdiffs4(__a, __b);
831 __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
832 return __nv_vabsdiffu2(__a, __b);
834 __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
835 return __nv_vabsdiffu4(__a, __b);
843 __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
844 return __nv_vadd2(__a, __b);
846 __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
847 return __nv_vadd4(__a, __b);
849 __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
850 return __nv_vaddss2(__a, __b);
852 __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
853 return __nv_vaddss4(__a, __b);
855 __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
856 return __nv_vaddus2(__a, __b);
858 __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
859 return __nv_vaddus4(__a, __b);
861 __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
862 return __nv_vavgs2(__a, __b);
864 __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
865 return __nv_vavgs4(__a, __b);
867 __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
868 return __nv_vavgu2(__a, __b);
870 __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
871 return __nv_vavgu4(__a, __b);
873 __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
874 return __nv_vcmpeq2(__a, __b);
876 __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
877 return __nv_vcmpeq4(__a, __b);
879 __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
880 return __nv_vcmpges2(__a, __b);
882 __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
883 return __nv_vcmpges4(__a, __b);
885 __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
886 return __nv_vcmpgeu2(__a, __b);
888 __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
889 return __nv_vcmpgeu4(__a, __b);
891 __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
892 return __nv_vcmpgts2(__a, __b);
894 __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
895 return __nv_vcmpgts4(__a, __b);
897 __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
898 return __nv_vcmpgtu2(__a, __b);
900 __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
901 return __nv_vcmpgtu4(__a, __b);
903 __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
904 return __nv_vcmples2(__a, __b);
906 __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
907 return __nv_vcmples4(__a, __b);
909 __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
910 return __nv_vcmpleu2(__a, __b);
912 __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
913 return __nv_vcmpleu4(__a, __b);
915 __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
916 return __nv_vcmplts2(__a, __b);
918 __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
919 return __nv_vcmplts4(__a, __b);
921 __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
922 return __nv_vcmpltu2(__a, __b);
924 __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
925 return __nv_vcmpltu4(__a, __b);
927 __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
928 return __nv_vcmpne2(__a, __b);
930 __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
931 return __nv_vcmpne4(__a, __b);
933 __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
934 return __nv_vhaddu2(__a, __b);
936 __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
937 return __nv_vhaddu4(__a, __b);
939 __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
940 return __nv_vmaxs2(__a, __b);
942 __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
943 return __nv_vmaxs4(__a, __b);
945 __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
946 return __nv_vmaxu2(__a, __b);
948 __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
949 return __nv_vmaxu4(__a, __b);
951 __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
952 return __nv_vmins2(__a, __b);
954 __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
955 return __nv_vmins4(__a, __b);
957 __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
958 return __nv_vminu2(__a, __b);
960 __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
961 return __nv_vminu4(__a, __b);
971 __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
972 return __nv_vsads2(__a, __b);
974 __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
975 return __nv_vsads4(__a, __b);
977 __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
978 return __nv_vsadu2(__a, __b);
980 __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
981 return __nv_vsadu4(__a, __b);
983 __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
984 return __nv_vseteq2(__a, __b);
986 __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
987 return __nv_vseteq4(__a, __b);
989 __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
990 return __nv_vsetges2(__a, __b);
992 __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
993 return __nv_vsetges4(__a, __b);
995 __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
996 return __nv_vsetgeu2(__a, __b);
998 __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
999 return __nv_vsetgeu4(__a, __b);
1001 __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
1002 return __nv_vsetgts2(__a, __b);
1004 __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
1005 return __nv_vsetgts4(__a, __b);
1007 __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
1008 return __nv_vsetgtu2(__a, __b);
1010 __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
1011 return __nv_vsetgtu4(__a, __b);
1013 __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
1014 return __nv_vsetles2(__a, __b);
1016 __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
1017 return __nv_vsetles4(__a, __b);
1019 __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
1020 return __nv_vsetleu2(__a, __b);
1022 __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
1023 return __nv_vsetleu4(__a, __b);
1025 __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
1026 return __nv_vsetlts2(__a, __b);
1028 __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
1029 return __nv_vsetlts4(__a, __b);
1031 __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
1032 return __nv_vsetltu2(__a, __b);
1034 __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
1035 return __nv_vsetltu4(__a, __b);
1037 __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
1038 return __nv_vsetne2(__a, __b);
1040 __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
1041 return __nv_vsetne4(__a, __b);
1043 __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
1044 return __nv_vsub2(__a, __b);
1046 __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
1047 return __nv_vsub4(__a, __b);
1049 __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
1050 return __nv_vsubss2(__a, __b);
1052 __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
1053 return __nv_vsubss4(__a, __b);
1055 __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
1056 return __nv_vsubus2(__a, __b);
1058 __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
1059 return __nv_vsubus4(__a, __b);
1086 __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
1090 : "r"(__a), "r"(__b), "r"(0));
1094 __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
1098 : "r"(__a), "r"(__b), "r"(0));
1101 __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
1105 : "r"(__a), "r"(__b), "r"(0));
1108 __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
1112 : "r"(__a), "r"(__b), "r"(0));
1129 __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
1131 asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1134 __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
1136 asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1139 __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
1143 : "r"(__a), "r"(__b), "r"(0));
1146 __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
1150 : "r"(__a), "r"(__b), "r"(0));
1153 __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
1157 : "r"(__a), "r"(__b), "r"(0));
1160 __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
1164 : "r"(__a), "r"(__b), "r"(0));
1167 __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
1169 asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1172 __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
1174 asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1177 __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
1179 asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1182 __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
1184 asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1187 __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
1189 asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1192 __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
1193 return __bool2mask(__vseteq2(__a, __b), 16);
1195 __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
1197 asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1200 __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
1201 return __bool2mask(__vseteq4(__a, __b), 8);
1203 __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
1205 asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1208 __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
1209 return __bool2mask(__vsetges2(__a, __b), 16);
1211 __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
1213 asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1216 __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
1217 return __bool2mask(__vsetges4(__a, __b), 8);
1219 __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
1221 asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1224 __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
1225 return __bool2mask(__vsetgeu2(__a, __b), 16);
1227 __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
1229 asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1232 __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
1233 return __bool2mask(__vsetgeu4(__a, __b), 8);
1235 __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
1237 asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1240 __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
1241 return __bool2mask(__vsetgts2(__a, __b), 16);
1243 __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
1245 asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1248 __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
1249 return __bool2mask(__vsetgts4(__a, __b), 8);
1251 __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
1253 asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1256 __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
1257 return __bool2mask(__vsetgtu2(__a, __b), 16);
1259 __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
1261 asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1264 __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
1265 return __bool2mask(__vsetgtu4(__a, __b), 8);
1267 __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
1269 asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1272 __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
1273 return __bool2mask(__vsetles2(__a, __b), 16);
1275 __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
1277 asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1280 __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
1281 return __bool2mask(__vsetles4(__a, __b), 8);
1283 __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
1285 asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1288 __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
1289 return __bool2mask(__vsetleu2(__a, __b), 16);
1291 __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
1293 asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1296 __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
1297 return __bool2mask(__vsetleu4(__a, __b), 8);
1299 __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
1301 asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1304 __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
1305 return __bool2mask(__vsetlts2(__a, __b), 16);
1307 __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
1309 asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1312 __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
1313 return __bool2mask(__vsetlts4(__a, __b), 8);
1315 __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
1317 asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1320 __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
1321 return __bool2mask(__vsetltu2(__a, __b), 16);
1323 __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
1325 asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1328 __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
1329 return __bool2mask(__vsetltu4(__a, __b), 8);
1331 __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
1333 asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1336 __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
1337 return __bool2mask(__vsetne2(__a, __b), 16);
1339 __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
1341 asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1344 __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
1345 return __bool2mask(__vsetne4(__a, __b), 8);
1353 __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
1354 return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);
1356 __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
1357 return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);
1360 __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
1362 if ((__a & 0x8000) && (__b & 0x8000)) {
1365 unsigned mask = __vcmpgts2(__a, __b);
1366 r = (__a & mask) | (__b & ~mask);
1370 : "r"(__a), "r"(__b), "r"(0));
1374 __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
1376 asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1379 __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
1381 asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1384 __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
1386 asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1389 __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
1391 asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1394 __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
1396 asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1399 __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
1401 asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1404 __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
1406 asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1409 __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
1413 : "r"(__a), "r"(__b), "r"(0));
1416 __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
1420 : "r"(__a), "r"(__b), "r"(0));
1423 __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
1427 : "r"(__a), "r"(__b), "r"(0));
1430 __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
1434 : "r"(__a), "r"(__b), "r"(0));
1438 __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
1440 asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1445 __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
1447 asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
1451 __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
1455 : "r"(__a), "r"(__b), "r"(0));
1461 __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
1465 : "r"(__a), "r"(__b), "r"(0));
1471 __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
1475 : "r"(__a), "r"(__b), "r"(0));
1478 __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
1482 : "r"(__a), "r"(__b), "r"(0));
1497 __DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
1498 __DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
1510 __DEVICE__ double copysign(double __a, double __b) {
1511 return __nv_copysign(__a, __b);
1513 __DEVICE__ float copysignf(float __a, float __b) {
1514 return __nv_copysignf(__a, __b);
1547 __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
1548 __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
1549 __DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }
1550 __DEVICE__ float fdividef(float __a, float __b) {
1552 return __nv_fast_fdividef(__a, __b);
1554 return __a / __b;
1559 __DEVICE__ double fma(double __a, double __b, double __c) {
1560 return __nv_fma(__a, __b, __c);
1562 __DEVICE__ float fmaf(float __a, float __b, float __c) {
1563 return __nv_fmaf(__a, __b, __c);
1565 __DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }
1566 __DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }
1567 __DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }
1568 __DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }
1569 __DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }
1570 __DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }
1571 __DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }
1572 __DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }
1573 __DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }
1574 __DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }
1588 __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
1589 __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
1593 __DEVICE__ long long llmax(long long __a, long long __b) {
1594 return __nv_llmax(__a, __b);
1596 __DEVICE__ long long llmin(long long __a, long long __b) {
1597 return __nv_llmin(__a, __b);
1628 __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
1632 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
1633 return __builtin_memcpy(__a, __b, __c);
1635 __DEVICE__ void *memset(void *__a, int __b, size_t __c) {
1636 return __builtin_memset(__a, __b, __c);
1639 __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
1640 __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
1641 __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
1644 __DEVICE__ double nextafter(double __a, double __b) {
1645 return __nv_nextafter(__a, __b);
1647 __DEVICE__ float nextafterf(float __a, float __b) {
1648 return __nv_nextafterf(__a, __b);
1653 __DEVICE__ double norm3d(double __a, double __b, double __c) {
1654 return __nv_norm3d(__a, __b, __c);
1656 __DEVICE__ float norm3df(float __a, float __b, float __c) {
1657 return __nv_norm3df(__a, __b, __c);
1659 __DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {
1660 return __nv_norm4d(__a, __b, __c, __d);
1662 __DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {
1663 return __nv_norm4df(__a, __b, __c, __d);
1672 __DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }
1673 __DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }
1674 __DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }
1675 __DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }
1678 __DEVICE__ double remainder(double __a, double __b) {
1679 return __nv_remainder(__a, __b);
1681 __DEVICE__ float remainderf(float __a, float __b) {
1682 return __nv_remainderf(__a, __b);
1684 __DEVICE__ double remquo(double __a, double __b, int *__c) {
1685 return __nv_remquo(__a, __b, __c);
1687 __DEVICE__ float remquof(float __a, float __b, int *__c) {
1688 return __nv_remquof(__a, __b, __c);
1690 __DEVICE__ double rhypot(double __a, double __b) {
1691 return __nv_rhypot(__a, __b);
1693 __DEVICE__ float rhypotf(float __a, float __b) {
1694 return __nv_rhypotf(__a, __b);
1698 __DEVICE__ double rnorm(int __a, const double *__b) {
1699 return __nv_rnorm(__a, __b);
1701 __DEVICE__ double rnorm3d(double __a, double __b, double __c) {
1702 return __nv_rnorm3d(__a, __b, __c);
1704 __DEVICE__ float rnorm3df(float __a, float __b, float __c) {
1705 return __nv_rnorm3df(__a, __b, __c);
1707 __DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {
1708 return __nv_rnorm4d(__a, __b, __c, __d);
1710 __DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {
1711 return __nv_rnorm4df(__a, __b, __c, __d);
1720 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
1721 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
1724 __DEVICE__ double scalbln(double __a, long __b) {
1725 if (__b > INT_MAX)
1727 if (__b < INT_MIN)
1729 return scalbn(__a, (int)__b);
1731 __DEVICE__ float scalblnf(float __a, long __b) {
1732 if (__b > INT_MAX)
1734 if (__b < INT_MIN)
1736 return scalbnf(__a, (int)__b);
1770 unsigned long long __b) {
1771 return __nv_ullmax(__a, __b);
1774 unsigned long long __b) {
1775 return __nv_ullmin(__a, __b);
1777 __DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {
1778 return __nv_umax(__a, __b);
1780 __DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {
1781 return __nv_umin(__a, __b);
1787 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
1788 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }