NVPTXIntrinsics.td revision 263508
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10def immFloat0 : PatLeaf<(fpimm), [{ 11 float f = (float)N->getValueAPF().convertToFloat(); 12 return (f==0.0f); 13}]>; 14 15def immFloat1 : PatLeaf<(fpimm), [{ 16 float f = (float)N->getValueAPF().convertToFloat(); 17 return (f==1.0f); 18}]>; 19 20def immDouble0 : PatLeaf<(fpimm), [{ 21 double d = (double)N->getValueAPF().convertToDouble(); 22 return (d==0.0); 23}]>; 24 25def immDouble1 : PatLeaf<(fpimm), [{ 26 double d = (double)N->getValueAPF().convertToDouble(); 27 return (d==1.0); 28}]>; 29 30 31 32//----------------------------------- 33// Synchronization Functions 34//----------------------------------- 35def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins), 36 "bar.sync \t0;", 37 [(int_cuda_syncthreads)]>; 38def INT_BARRIER0 : NVPTXInst<(outs), (ins), 39 "bar.sync \t0;", 40 [(int_nvvm_barrier0)]>; 41def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 42 !strconcat("{{ \n\t", 43 !strconcat(".reg .pred \t%p1; \n\t", 44 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 45 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 46 !strconcat("}}", ""))))), 47 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 48def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 49 !strconcat("{{ \n\t", 50 !strconcat(".reg .pred \t%p1; \n\t", 51 !strconcat(".reg .pred \t%p2; \n\t", 52 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 53 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t", 54 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 55 !strconcat("}}", ""))))))), 56 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 57def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 58 !strconcat("{{ \n\t", 59 !strconcat(".reg .pred \t%p1; \n\t", 60 !strconcat(".reg .pred \t%p2; \n\t", 61 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 62 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t", 63 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 64 !strconcat("}}", ""))))))), 65 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 66 67 68//----------------------------------- 69// Explicit Memory Fence Functions 70//----------------------------------- 71class MEMBAR<string StrOp, Intrinsic IntOP> : 72 NVPTXInst<(outs), (ins), 73 StrOp, [(IntOP)]>; 74 75def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 76def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 77def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 78 79 80//----------------------------------- 81// Math Functions 82//----------------------------------- 83 84// Map min(1.0, max(0.0, x)) to sat(x) 85// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 86// NaN 87// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 88// Same story for fmax, fmin. 89 90def : Pat<(int_nvvm_fmin_f immFloat1, 91 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 92 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 93def : Pat<(int_nvvm_fmin_f immFloat1, 94 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 95 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 96def : Pat<(int_nvvm_fmin_f 97 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 98 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 99def : Pat<(int_nvvm_fmin_f 100 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 101 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 102 103def : Pat<(int_nvvm_fmin_d immDouble1, 104 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 105 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 106def : Pat<(int_nvvm_fmin_d immDouble1, 107 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 108 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 109def : Pat<(int_nvvm_fmin_d 110 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 111 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 112def : Pat<(int_nvvm_fmin_d 113 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 114 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 115 116 117// We need a full string for OpcStr here because we need to deal with case like 118// INT_PTX_RECIP. 119class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 120 NVPTXRegClass src_regclass, Intrinsic IntOP> 121 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 122 OpcStr, 123 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 124 125// We need a full string for OpcStr here because we need to deal with the case 126// like INT_PTX_NATIVE_POWR_F. 127class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 128 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 129 : NVPTXInst<(outs t_regclass:$dst), 130 (ins s0_regclass:$src0, s1_regclass:$src1), 131 OpcStr, 132 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 133 134class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 135 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 136 NVPTXRegClass s2_regclass, Intrinsic IntOP> 137 : NVPTXInst<(outs t_regclass:$dst), 138 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 139 OpcStr, 140 [(set t_regclass:$dst, 141 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 142 143// 144// MISC 145// 146 147def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 148 int_nvvm_clz_i>; 149def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 150 int_nvvm_clz_ll>; 151 152def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 153 int_nvvm_popc_i>; 154def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 155 int_nvvm_popc_ll>; 156 157def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 158 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 159 160// 161// Min Max 162// 163 164def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, 165 Int32Regs, Int32Regs, int_nvvm_min_i>; 166def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, 167 Int32Regs, Int32Regs, int_nvvm_min_ui>; 168 169def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, 170 Int64Regs, Int64Regs, int_nvvm_min_ll>; 171def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, 172 Int64Regs, Int64Regs, int_nvvm_min_ull>; 173 174def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, 175 Int32Regs, Int32Regs, int_nvvm_max_i>; 176def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, 177 Int32Regs, Int32Regs, int_nvvm_max_ui>; 178 179def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, 180 Int64Regs, Int64Regs, int_nvvm_max_ll>; 181def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, 182 Int64Regs, Int64Regs, int_nvvm_max_ull>; 183 184def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 185 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 186def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 187 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 188 189def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 190 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 191def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 192 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 193 194def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 195 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 196def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 197 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 198 199// 200// Multiplication 201// 202 203def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 204 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 205def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 206 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 207 208def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 209 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 210def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 211 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 212 213def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 214 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 215def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 216 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 217def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 218 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 219def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 220 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 221def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 222 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 223def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 224 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 225def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 226 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 227def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 228 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 229 230def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 231 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 232def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 233 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 234def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 235 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 236def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 237 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 238 239def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 240 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 241def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 242 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 243 244// 245// Div 246// 247 248def INT_NVVM_DIV_APPROX_FTZ_F 249 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 250 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 251def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 252 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 253 254def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 255 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 256def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 258def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 260def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 262def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 264def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 266def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 268def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 270 271def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 272 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 273def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 275def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 277def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 279 280// 281// Brev 282// 283 284def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 285 int_nvvm_brev32>; 286def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, 287 int_nvvm_brev64>; 288 289// 290// Sad 291// 292 293def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 294 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 295def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 296 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 297 298// 299// Floor Ceil 300// 301 302def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 303 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 304def : Pat<(int_nvvm_floor_f Float32Regs:$a), 305 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 306def : Pat<(int_nvvm_floor_d Float64Regs:$a), 307 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 308 309def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 310 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 311def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 312 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 313def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 314 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 315 316// 317// Abs 318// 319 320def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, 321 int_nvvm_abs_i>; 322def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, 323 int_nvvm_abs_ll>; 324 325def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 326 Float32Regs, int_nvvm_fabs_ftz_f>; 327def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 328 Float32Regs, int_nvvm_fabs_f>; 329 330def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 331 Float64Regs, int_nvvm_fabs_d>; 332 333// 334// Round 335// 336 337def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 338 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 339def : Pat<(int_nvvm_round_f Float32Regs:$a), 340 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 341def : Pat<(int_nvvm_round_d Float64Regs:$a), 342 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 343 344// 345// Trunc 346// 347 348def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 349 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 350def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 351 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 352def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 353 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 354 355// 356// Saturate 357// 358 359def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 360 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 361def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 363def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 364 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 365 366// 367// Exp2 Log2 368// 369 370def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 371 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 372def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 373 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 374def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 375 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 376 377def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 378 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 379def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 380 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 381def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 382 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 383 384// 385// Sin Cos 386// 387 388def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 389 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 390def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 391 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 392 393def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 394 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 395def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 396 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 397 398// 399// Fma 400// 401 402def INT_NVVM_FMA_RN_FTZ_F 403 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 405def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 406 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 407def INT_NVVM_FMA_RZ_FTZ_F 408 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 409 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 410def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 411 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 412def INT_NVVM_FMA_RM_FTZ_F 413 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 414 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 415def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 416 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 417def INT_NVVM_FMA_RP_FTZ_F 418 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 419 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 420def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 421 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 422 423def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 424 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 425def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 426 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 427def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 428 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 429def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 430 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 431 432// 433// Rcp 434// 435 436def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 437 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 438def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 439 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 440def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 441 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 442def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 443 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 444def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 445 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 446def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 447 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 448def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 449 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 450def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 451 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 452 453def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 454 Float64Regs, int_nvvm_rcp_rn_d>; 455def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 456 Float64Regs, int_nvvm_rcp_rz_d>; 457def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 458 Float64Regs, int_nvvm_rcp_rm_d>; 459def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 460 Float64Regs, int_nvvm_rcp_rp_d>; 461 462def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 463 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 464 465// 466// Sqrt 467// 468 469def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 470 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 471def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 472 Float32Regs, int_nvvm_sqrt_rn_f>; 473def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 474 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 475def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 476 Float32Regs, int_nvvm_sqrt_rz_f>; 477def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 478 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 479def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 480 Float32Regs, int_nvvm_sqrt_rm_f>; 481def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 482 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 483def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 484 Float32Regs, int_nvvm_sqrt_rp_f>; 485def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 486 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 487def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 488 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 489 490def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 491 Float64Regs, int_nvvm_sqrt_rn_d>; 492def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 493 Float64Regs, int_nvvm_sqrt_rz_d>; 494def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 495 Float64Regs, int_nvvm_sqrt_rm_d>; 496def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 497 Float64Regs, int_nvvm_sqrt_rp_d>; 498 499// nvvm_sqrt intrinsic 500def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 501 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 502def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 503 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 504def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 505 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 506def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 507 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 508 509// 510// Rsqrt 511// 512 513def INT_NVVM_RSQRT_APPROX_FTZ_F 514 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 515 int_nvvm_rsqrt_approx_ftz_f>; 516def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 517 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 518def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 519 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 520 521// 522// Add 523// 524 525def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 526 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 527def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 528 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 529def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 530 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 531def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 532 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 533def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 534 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 535def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 536 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 537def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 538 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 539def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 540 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 541 542def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 543 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 544def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 545 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 546def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 547 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 548def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 549 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 550 551// 552// Convert 553// 554 555def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 556 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 557def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 558 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 559def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 560 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 561def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 562 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 563def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 564 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 565def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 566 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 567def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 568 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 569def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 570 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 571 572def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 573 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 574def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 575 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 576def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 577 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 578def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 579 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 580 581def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 582 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 583def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 584 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 585def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 586 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 587def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 588 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 589 590def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 591 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 592def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 593 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 594def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 595 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 596def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 597 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 598 599def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 600 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 601def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 602 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 603def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 604 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 605def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 606 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 607 608def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 609 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 610def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 611 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 612def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 613 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 614def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 615 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 616def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 617 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 618def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 619 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 620def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 621 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 622def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 623 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 624 625def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 626 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 627def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 628 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 629def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 630 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 631def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 632 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 633def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 634 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 635def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 636 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 637def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 638 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 639def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 640 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 641 642def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 643 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 644def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 645 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 646def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 647 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 648def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 649 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 650 651def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 652 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 653def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 654 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 655def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 656 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 657def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 658 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 659 660def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 661 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 662 663def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t", 664 !strconcat(".reg .b32 %temp; \n\t", 665 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t", 666 "}}"))), 667 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 668def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t", 669 !strconcat(".reg .b32 %temp; \n\t", 670 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t", 671 "}}"))), 672 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 673 674def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 675 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 676def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 677 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 678def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 679 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 680def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 681 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 682def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 683 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 684def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 685 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 686def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 687 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 688def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 689 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 690 691def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 692 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 693def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 694 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 695def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 696 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 697def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 698 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 699def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 700 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 701def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 702 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 703def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 704 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 706 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 707 708def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 709 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 710def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 711 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 712def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 713 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 714def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 715 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 716 717def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 718 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 719def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 720 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 721def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 722 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 723def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 724 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 725 726def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 727 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 728def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 729 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 730def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 731 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 732def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 733 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 734 735def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 736 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 737def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 738 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 739def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 740 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 741def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 742 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 743 744def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 745 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 746def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 747 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 748def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 749 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 750def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 751 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 752 753def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 754 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 755def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 756 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 757def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 758 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 759def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 760 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 761 762 763// FIXME: Ideally, we could use these patterns instead of the scope-creating 764// patterns, but ptxas does not like these since .s16 is not compatible with 765// .f16. The solution is to use .bXX for all integer register types, but we 766// are not there yet. 767//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 768// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 769//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 770// (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 771// 772//def : Pat<(int_nvvm_h2f Int16Regs:$a), 773// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 774 775def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t", 776 !strconcat(".reg .b16 %temp;\n\t", 777 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t", 778 !strconcat("mov.b16 \t$dst, %temp;\n", 779 "}}")))), 780 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>; 781def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t", 782 !strconcat(".reg .b16 %temp;\n\t", 783 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t", 784 !strconcat("mov.b16 \t$dst, %temp;\n", 785 "}}")))), 786 Int16Regs, Float32Regs, int_nvvm_f2h_rn>; 787 788def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t", 789 !strconcat(".reg .b16 %temp;\n\t", 790 !strconcat("mov.b16 \t%temp, $src0;\n\t", 791 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t", 792 "}}")))), 793 Float32Regs, Int16Regs, int_nvvm_h2f>; 794 795def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), 796 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 797def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 798 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 799def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 800 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 801 802// 803// Bitcast 804// 805 806def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 807 Float32Regs, int_nvvm_bitcast_f2i>; 808def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 809 Int32Regs, int_nvvm_bitcast_i2f>; 810 811def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 812 Int64Regs, int_nvvm_bitcast_ll2d>; 813def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 814 Float64Regs, int_nvvm_bitcast_d2ll>; 815 816//----------------------------------- 817// Atomic Functions 818//----------------------------------- 819 820class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 821 : PatFrag<ops, frag, [{ 822 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 823}]>; 824class ATOMIC_SHARED_CHK <dag ops, dag frag> 825 : PatFrag<ops, frag, [{ 826 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 827}]>; 828class ATOMIC_GENERIC_CHK <dag ops, dag frag> 829 : PatFrag<ops, frag, [{ 830 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 831}]>; 832 833multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 834 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 835 Operand IMMType, SDNode IMM, Predicate Pred> { 836 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 837 !strconcat("atom", 838 !strconcat(SpaceStr, 839 !strconcat(OpcStr, 840 !strconcat(TypeStr, 841 !strconcat(" \t$dst, [$addr], $b;", ""))))), 842 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 843 Requires<[Pred]>; 844 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 845 !strconcat("atom", 846 !strconcat(SpaceStr, 847 !strconcat(OpcStr, 848 !strconcat(TypeStr, 849 !strconcat(" \t$dst, [$addr], $b;", ""))))), 850 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 851 Requires<[Pred]>; 852} 853multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 854 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> { 855 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 856 IntOp, IMMType, IMM, Pred>; 857 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 858 IntOp, IMMType, IMM, Pred>; 859} 860 861// has 2 operands, neg the second one 862multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 863 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 864 Operand IMMType, Predicate Pred> { 865 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 866 !strconcat("{{ \n\t", 867 !strconcat(".reg \t.s", 868 !strconcat(TypeStr, 869 !strconcat(" temp; \n\t", 870 !strconcat("neg.s", 871 !strconcat(TypeStr, 872 !strconcat(" \ttemp, $b; \n\t", 873 !strconcat("atom", 874 !strconcat(SpaceStr, 875 !strconcat(OpcStr, 876 !strconcat(".u", 877 !strconcat(TypeStr, 878 !strconcat(" \t$dst, [$addr], temp; \n\t", 879 !strconcat("}}", "")))))))))))))), 880 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 881 Requires<[Pred]>; 882} 883multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 884 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 885 Predicate Pred> { 886 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 887 IntOp, IMMType, Pred> ; 888 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 889 IntOp, IMMType, Pred> ; 890} 891 892// has 3 operands 893multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 894 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 895 Operand IMMType, Predicate Pred> { 896 def reg : NVPTXInst<(outs regclass:$dst), 897 (ins ptrclass:$addr, regclass:$b, regclass:$c), 898 !strconcat("atom", 899 !strconcat(SpaceStr, 900 !strconcat(OpcStr, 901 !strconcat(TypeStr, 902 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 903 [(set regclass:$dst, 904 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 905 Requires<[Pred]>; 906 def imm1 : NVPTXInst<(outs regclass:$dst), 907 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 908 !strconcat("atom", 909 !strconcat(SpaceStr, 910 !strconcat(OpcStr, 911 !strconcat(TypeStr, 912 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 913 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 914 Requires<[Pred]>; 915 def imm2 : NVPTXInst<(outs regclass:$dst), 916 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 917 !strconcat("atom", 918 !strconcat(SpaceStr, 919 !strconcat(OpcStr, 920 !strconcat(TypeStr, 921 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 922 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 923 Requires<[Pred]>; 924 def imm3 : NVPTXInst<(outs regclass:$dst), 925 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 926 !strconcat("atom", 927 !strconcat(SpaceStr, 928 !strconcat(OpcStr, 929 !strconcat(TypeStr, 930 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 931 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 932 Requires<[Pred]>; 933} 934multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 935 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> { 936 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 937 IntOp, IMMType, Pred>; 938 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 939 IntOp, IMMType, Pred>; 940} 941 942// atom_add 943 944def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 945 (atomic_load_add_32 node:$a, node:$b)>; 946def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 947 (atomic_load_add_32 node:$a, node:$b)>; 948def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 949 (atomic_load_add_32 node:$a, node:$b)>; 950def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 951 (atomic_load_add_64 node:$a, node:$b)>; 952def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 953 (atomic_load_add_64 node:$a, node:$b)>; 954def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 955 (atomic_load_add_64 node:$a, node:$b)>; 956def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 957 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 958def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 959 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 960def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 961 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 962 963defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 964 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; 965defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 966 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>; 967defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 968 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>; 969defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 970 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>; 971 972defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 973 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>; 974defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 975 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>; 976defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 977 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>; 978defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 979 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>; 980 981defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 982 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>; 983defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 984 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>; 985defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 986 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; 987 988// atom_sub 989 990def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 991 (atomic_load_sub_32 node:$a, node:$b)>; 992def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 993 (atomic_load_sub_32 node:$a, node:$b)>; 994def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 995 (atomic_load_sub_32 node:$a, node:$b)>; 996def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 997 (atomic_load_sub_64 node:$a, node:$b)>; 998def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 999 (atomic_load_sub_64 node:$a, node:$b)>; 1000def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1001 (atomic_load_sub_64 node:$a, node:$b)>; 1002 1003defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1004 atomic_load_sub_32_g, i32imm, hasAtomRedG32>; 1005defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1006 atomic_load_sub_64_g, i64imm, hasAtomRedG64>; 1007defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1008 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>; 1009defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1010 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>; 1011defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1012 atomic_load_sub_32_s, i32imm, hasAtomRedS32>; 1013defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1014 atomic_load_sub_64_s, i64imm, hasAtomRedS64>; 1015defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1016 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>; 1017defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1018 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>; 1019 1020// atom_swap 1021 1022def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1023 (atomic_swap_32 node:$a, node:$b)>; 1024def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1025 (atomic_swap_32 node:$a, node:$b)>; 1026def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1027 (atomic_swap_32 node:$a, node:$b)>; 1028def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1029 (atomic_swap_64 node:$a, node:$b)>; 1030def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1031 (atomic_swap_64 node:$a, node:$b)>; 1032def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1033 (atomic_swap_64 node:$a, node:$b)>; 1034 1035defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1036 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>; 1037defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1038 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>; 1039defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1040 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>; 1041defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1042 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1043defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1044 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>; 1045defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1046 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>; 1047defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1048 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>; 1049defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1050 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1051 1052// atom_max 1053 1054def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1055 , (atomic_load_max_32 node:$a, node:$b)>; 1056def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1057 (atomic_load_max_32 node:$a, node:$b)>; 1058def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1059 (atomic_load_max_32 node:$a, node:$b)>; 1060def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1061 (atomic_load_umax_32 node:$a, node:$b)>; 1062def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1063 (atomic_load_umax_32 node:$a, node:$b)>; 1064def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1065 (atomic_load_umax_32 node:$a, node:$b)>; 1066 1067defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1068 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>; 1069defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1070 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>; 1071defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1072 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>; 1073defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1074 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1075defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1076 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>; 1077defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1078 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>; 1079defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1080 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>; 1081defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1082 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1083 1084// atom_min 1085 1086def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1087 (atomic_load_min_32 node:$a, node:$b)>; 1088def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1089 (atomic_load_min_32 node:$a, node:$b)>; 1090def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1091 (atomic_load_min_32 node:$a, node:$b)>; 1092def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1093 (atomic_load_umin_32 node:$a, node:$b)>; 1094def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1095 (atomic_load_umin_32 node:$a, node:$b)>; 1096def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1097 (atomic_load_umin_32 node:$a, node:$b)>; 1098 1099defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1100 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>; 1101defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1102 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>; 1103defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1104 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>; 1105defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1106 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1107defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1108 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>; 1109defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1110 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>; 1111defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1112 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>; 1113defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1114 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1115 1116// atom_inc atom_dec 1117 1118def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1119 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1120def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1121 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1122def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1123 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1124def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1125 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1126def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1127 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1128def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1129 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1130 1131defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1132 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>; 1133defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1134 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>; 1135defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1136 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>; 1137defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1138 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1139defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1140 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>; 1141defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1142 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>; 1143defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1144 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>; 1145defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1146 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1147 1148// atom_and 1149 1150def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1151 (atomic_load_and_32 node:$a, node:$b)>; 1152def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1153 (atomic_load_and_32 node:$a, node:$b)>; 1154def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1155 (atomic_load_and_32 node:$a, node:$b)>; 1156 1157defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1158 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>; 1159defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1160 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>; 1161defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1162 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>; 1163defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1164 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1165 1166// atom_or 1167 1168def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1169 (atomic_load_or_32 node:$a, node:$b)>; 1170def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1171 (atomic_load_or_32 node:$a, node:$b)>; 1172def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1173 (atomic_load_or_32 node:$a, node:$b)>; 1174 1175defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1176 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>; 1177defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1178 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>; 1179defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1180 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1181defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1182 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>; 1183 1184// atom_xor 1185 1186def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1187 (atomic_load_xor_32 node:$a, node:$b)>; 1188def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1189 (atomic_load_xor_32 node:$a, node:$b)>; 1190def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1191 (atomic_load_xor_32 node:$a, node:$b)>; 1192 1193defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1194 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>; 1195defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1196 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>; 1197defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1198 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>; 1199defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1200 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1201 1202// atom_cas 1203 1204def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1205 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1206def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1207 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1208def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1209 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1210def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1211 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1212def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1213 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1214def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1215 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1216 1217defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1218 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>; 1219defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1220 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>; 1221defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1222 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>; 1223defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1224 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>; 1225defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1226 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>; 1227defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1228 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>; 1229defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1230 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>; 1231defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1232 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>; 1233 1234 1235//----------------------------------- 1236// Read Special Registers 1237//----------------------------------- 1238class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> : 1239 NVPTXInst<(outs regclassOut:$dst), (ins), 1240 OpStr, 1241 [(set regclassOut:$dst, (IntOp))]>; 1242 1243def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs, 1244 int_nvvm_read_ptx_sreg_tid_x>; 1245def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs, 1246 int_nvvm_read_ptx_sreg_tid_y>; 1247def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs, 1248 int_nvvm_read_ptx_sreg_tid_z>; 1249 1250def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs, 1251 int_nvvm_read_ptx_sreg_ntid_x>; 1252def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs, 1253 int_nvvm_read_ptx_sreg_ntid_y>; 1254def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs, 1255 int_nvvm_read_ptx_sreg_ntid_z>; 1256 1257def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs, 1258 int_nvvm_read_ptx_sreg_ctaid_x>; 1259def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs, 1260 int_nvvm_read_ptx_sreg_ctaid_y>; 1261def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs, 1262 int_nvvm_read_ptx_sreg_ctaid_z>; 1263 1264def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs, 1265 int_nvvm_read_ptx_sreg_nctaid_x>; 1266def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs, 1267 int_nvvm_read_ptx_sreg_nctaid_y>; 1268def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs, 1269 int_nvvm_read_ptx_sreg_nctaid_z>; 1270 1271def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, 1272 int_nvvm_read_ptx_sreg_warpsize>; 1273 1274 1275//----------------------------------- 1276// Support for ldu on sm_20 or later 1277//----------------------------------- 1278 1279def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{ 1280 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 1281 return M->getMemoryVT() == MVT::i8; 1282}]>; 1283 1284// Scalar 1285// @TODO: Revisit this, Changed imemAny to imem 1286multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { 1287 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1288 !strconcat("ldu.global.", TyStr), 1289 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; 1290 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1291 !strconcat("ldu.global.", TyStr), 1292 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; 1293 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1294 !strconcat("ldu.global.", TyStr), 1295 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1296 Requires<[hasLDU]>; 1297 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1298 !strconcat("ldu.global.", TyStr), 1299 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; 1300 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1301 !strconcat("ldu.global.", TyStr), 1302 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; 1303} 1304 1305multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { 1306 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1307 !strconcat("ldu.global.", TyStr), 1308 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; 1309 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1310 !strconcat("ldu.global.", TyStr), 1311 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; 1312 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1313 !strconcat("ldu.global.", TyStr), 1314 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1315 Requires<[hasLDU]>; 1316 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1317 !strconcat("ldu.global.", TyStr), 1318 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; 1319 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1320 !strconcat("ldu.global.", TyStr), 1321 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; 1322} 1323 1324defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, 1325 ldu_i8>; 1326defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, 1327int_nvvm_ldu_global_i>; 1328defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, 1329int_nvvm_ldu_global_i>; 1330defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, 1331int_nvvm_ldu_global_i>; 1332defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs, 1333int_nvvm_ldu_global_f>; 1334defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs, 1335int_nvvm_ldu_global_f>; 1336defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, 1337int_nvvm_ldu_global_p>; 1338defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, 1339int_nvvm_ldu_global_p>; 1340 1341// vector 1342 1343// Elementized vector ldu 1344multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1345 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1346 (ins Int32Regs:$src), 1347 !strconcat("ldu.global.", TyStr), []>; 1348 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1349 (ins Int64Regs:$src), 1350 !strconcat("ldu.global.", TyStr), []>; 1351 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1352 (ins MEMri:$src), 1353 !strconcat("ldu.global.", TyStr), []>; 1354 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1355 (ins MEMri64:$src), 1356 !strconcat("ldu.global.", TyStr), []>; 1357 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1358 (ins imemAny:$src), 1359 !strconcat("ldu.global.", TyStr), []>; 1360} 1361 1362multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1363 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1364 regclass:$dst4), (ins Int32Regs:$src), 1365 !strconcat("ldu.global.", TyStr), []>; 1366 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1367 regclass:$dst4), (ins Int64Regs:$src), 1368 !strconcat("ldu.global.", TyStr), []>; 1369 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1370 regclass:$dst4), (ins MEMri:$src), 1371 !strconcat("ldu.global.", TyStr), []>; 1372 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1373 regclass:$dst4), (ins MEMri64:$src), 1374 !strconcat("ldu.global.", TyStr), []>; 1375 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1376 regclass:$dst4), (ins imemAny:$src), 1377 !strconcat("ldu.global.", TyStr), []>; 1378} 1379 1380defm INT_PTX_LDU_G_v2i8_ELE 1381 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1382defm INT_PTX_LDU_G_v2i16_ELE 1383 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1384defm INT_PTX_LDU_G_v2i32_ELE 1385 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1386defm INT_PTX_LDU_G_v2f32_ELE 1387 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1388defm INT_PTX_LDU_G_v2i64_ELE 1389 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1390defm INT_PTX_LDU_G_v2f64_ELE 1391 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1392defm INT_PTX_LDU_G_v4i8_ELE 1393 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1394defm INT_PTX_LDU_G_v4i16_ELE 1395 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1396 Int16Regs>; 1397defm INT_PTX_LDU_G_v4i32_ELE 1398 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1399 Int32Regs>; 1400defm INT_PTX_LDU_G_v4f32_ELE 1401 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1402 Float32Regs>; 1403 1404 1405//----------------------------------- 1406// Support for ldg on sm_35 or later 1407//----------------------------------- 1408 1409def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ 1410 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 1411 return M->getMemoryVT() == MVT::i8; 1412}]>; 1413 1414multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { 1415 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1416 !strconcat("ld.global.nc.", TyStr), 1417 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; 1418 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1419 !strconcat("ld.global.nc.", TyStr), 1420 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; 1421 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1422 !strconcat("ld.global.nc.", TyStr), 1423 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1424 Requires<[hasLDG]>; 1425 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1426 !strconcat("ld.global.nc.", TyStr), 1427 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; 1428 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1429 !strconcat("ld.global.nc.", TyStr), 1430 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; 1431} 1432 1433multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { 1434 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1435 !strconcat("ld.global.nc.", TyStr), 1436 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; 1437 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1438 !strconcat("ld.global.nc.", TyStr), 1439 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; 1440 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1441 !strconcat("ld.global.nc.", TyStr), 1442 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1443 Requires<[hasLDG]>; 1444 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1445 !strconcat("ld.global.nc.", TyStr), 1446 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; 1447 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1448 !strconcat("ld.global.nc.", TyStr), 1449 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; 1450} 1451 1452defm INT_PTX_LDG_GLOBAL_i8 1453 : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; 1454defm INT_PTX_LDG_GLOBAL_i16 1455 : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; 1456defm INT_PTX_LDG_GLOBAL_i32 1457 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; 1458defm INT_PTX_LDG_GLOBAL_i64 1459 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; 1460defm INT_PTX_LDG_GLOBAL_f32 1461 : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; 1462defm INT_PTX_LDG_GLOBAL_f64 1463 : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; 1464defm INT_PTX_LDG_GLOBAL_p32 1465 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; 1466defm INT_PTX_LDG_GLOBAL_p64 1467 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; 1468 1469// vector 1470 1471// Elementized vector ldg 1472multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1473 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1474 (ins Int32Regs:$src), 1475 !strconcat("ld.global.nc.", TyStr), []>; 1476 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1477 (ins Int64Regs:$src), 1478 !strconcat("ld.global.nc.", TyStr), []>; 1479 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1480 (ins MEMri:$src), 1481 !strconcat("ld.global.nc.", TyStr), []>; 1482 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1483 (ins MEMri64:$src), 1484 !strconcat("ld.global.nc.", TyStr), []>; 1485 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1486 (ins imemAny:$src), 1487 !strconcat("ld.global.nc.", TyStr), []>; 1488} 1489 1490multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1491 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1492 regclass:$dst4), (ins Int32Regs:$src), 1493 !strconcat("ld.global.nc.", TyStr), []>; 1494 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1495 regclass:$dst4), (ins Int64Regs:$src), 1496 !strconcat("ld.global.nc.", TyStr), []>; 1497 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1498 regclass:$dst4), (ins MEMri:$src), 1499 !strconcat("ld.global.nc.", TyStr), []>; 1500 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1501 regclass:$dst4), (ins MEMri64:$src), 1502 !strconcat("ld.global.nc.", TyStr), []>; 1503 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1504 regclass:$dst4), (ins imemAny:$src), 1505 !strconcat("ld.global.nc.", TyStr), []>; 1506} 1507 1508// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1509defm INT_PTX_LDG_G_v2i8_ELE 1510 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1511defm INT_PTX_LDG_G_v2i16_ELE 1512 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1513defm INT_PTX_LDG_G_v2i32_ELE 1514 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1515defm INT_PTX_LDG_G_v2f32_ELE 1516 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1517defm INT_PTX_LDG_G_v2i64_ELE 1518 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1519defm INT_PTX_LDG_G_v2f64_ELE 1520 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1521defm INT_PTX_LDG_G_v4i8_ELE 1522 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1523defm INT_PTX_LDG_G_v4i16_ELE 1524 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1525defm INT_PTX_LDG_G_v4i32_ELE 1526 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1527defm INT_PTX_LDG_G_v4f32_ELE 1528 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1529 1530 1531multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1532 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1533 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1534 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1535 Requires<[hasGenericLdSt]>; 1536 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1537 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1538 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1539 Requires<[hasGenericLdSt]>; 1540 1541// @TODO: Are these actually needed? I believe global addresses will be copied 1542// to register values anyway. 1543 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), 1544 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1545 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1546 Requires<[hasGenericLdSt]>; 1547 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), 1548 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1549 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1550 Requires<[hasGenericLdSt]>;*/ 1551 1552 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1553 "mov.u32 \t$result, $src;", 1554 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1555 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1556 "mov.u64 \t$result, $src;", 1557 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1558 1559// @TODO: Are these actually needed? I believe global addresses will be copied 1560// to register values anyway. 1561 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src), 1562 "mov.u32 \t$result, $src;", 1563 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>; 1564 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 1565 "mov.u64 \t$result, $src;", 1566 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/ 1567} 1568 1569multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1570 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1571 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")), 1572 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1573 Requires<[hasGenericLdSt]>; 1574 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1575 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")), 1576 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1577 Requires<[hasGenericLdSt]>; 1578 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1579 "mov.u32 \t$result, $src;", 1580 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1581 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1582 "mov.u64 \t$result, $src;", 1583 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1584} 1585 1586defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1587defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1588defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1589defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1590 1591defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1592defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1593defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1594defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1595 1596 1597// nvvm.ptr.gen.to.param 1598def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1599 (ins Int32Regs:$src), 1600 "mov.u32 \t$result, $src;", 1601 [(set Int32Regs:$result, 1602 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1603def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1604 (ins Int64Regs:$src), 1605 "mov.u64 \t$result, $src;", 1606 [(set Int64Regs:$result, 1607 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 1608 1609 1610// nvvm.move intrinsicc 1611def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 1612 "mov.b16 \t$r, $s;", 1613 [(set Int16Regs:$r, 1614 (int_nvvm_move_i16 Int16Regs:$s))]>; 1615def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1616 "mov.b32 \t$r, $s;", 1617 [(set Int32Regs:$r, 1618 (int_nvvm_move_i32 Int32Regs:$s))]>; 1619def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1620 "mov.b64 \t$r, $s;", 1621 [(set Int64Regs:$r, 1622 (int_nvvm_move_i64 Int64Regs:$s))]>; 1623def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 1624 "mov.f32 \t$r, $s;", 1625 [(set Float32Regs:$r, 1626 (int_nvvm_move_float Float32Regs:$s))]>; 1627def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 1628 "mov.f64 \t$r, $s;", 1629 [(set Float64Regs:$r, 1630 (int_nvvm_move_double Float64Regs:$s))]>; 1631def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1632 "mov.u32 \t$r, $s;", 1633 [(set Int32Regs:$r, 1634 (int_nvvm_move_ptr Int32Regs:$s))]>; 1635def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1636 "mov.u64 \t$r, $s;", 1637 [(set Int64Regs:$r, 1638 (int_nvvm_move_ptr Int64Regs:$s))]>; 1639 1640// @TODO: Are these actually needed, or will we always just see symbols 1641// copied to registers first? 1642/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 1643 "mov.u32 \t$r, $s;", 1644 [(set Int32Regs:$r, 1645 (int_nvvm_move_ptr texternalsym:$s))]>; 1646def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 1647 "mov.u64 \t$r, $s;", 1648 [(set Int64Regs:$r, 1649 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 1650 1651 1652// MoveParam %r1, param 1653// ptr_local_to_gen %r2, %r1 1654// ptr_gen_to_local %r3, %r2 1655// -> 1656// mov %r1, param 1657 1658// @TODO: Revisit this. There is a type 1659// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 1660// instructions are not currently defined. However, we can use the ptr 1661// variants and the asm printer will do the right thing. 1662def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1663 (MoveParam texternalsym:$src)))), 1664 (nvvm_move_ptr64 texternalsym:$src)>; 1665def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1666 (MoveParam texternalsym:$src)))), 1667 (nvvm_move_ptr32 texternalsym:$src)>; 1668 1669 1670//----------------------------------- 1671// Compiler Error Warn 1672// - Just ignore them in codegen 1673//----------------------------------- 1674 1675def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1676 "// llvm.nvvm.compiler.warn()", 1677 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 1678def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1679 "// llvm.nvvm.compiler.warn()", 1680 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 1681def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1682 "// llvm.nvvm.compiler.error()", 1683 [(int_nvvm_compiler_error Int32Regs:$a)]>; 1684def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1685 "// llvm.nvvm.compiler.error()", 1686 [(int_nvvm_compiler_error Int64Regs:$a)]>; 1687 1688 1689 1690//===-- Old PTX Back-end Intrinsics ---------------------------------------===// 1691 1692// These intrinsics are handled to retain compatibility with the old backend. 1693 1694// PTX Special Purpose Register Accessor Intrinsics 1695 1696class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> 1697 : NVPTXInst<(outs Int64Regs:$d), (ins), 1698 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"), 1699 [(set Int64Regs:$d, (intop))]>; 1700 1701class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> 1702 : NVPTXInst<(outs Int32Regs:$d), (ins), 1703 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"), 1704 [(set Int32Regs:$d, (intop))]>; 1705 1706// TODO Add read vector-version of special registers 1707 1708def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", 1709 int_ptx_read_tid_x>; 1710def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", 1711 int_ptx_read_tid_y>; 1712def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", 1713 int_ptx_read_tid_z>; 1714def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", 1715 int_ptx_read_tid_w>; 1716 1717def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", 1718 int_ptx_read_ntid_x>; 1719def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", 1720 int_ptx_read_ntid_y>; 1721def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", 1722 int_ptx_read_ntid_z>; 1723def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", 1724 int_ptx_read_ntid_w>; 1725 1726def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", 1727 int_ptx_read_laneid>; 1728def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", 1729 int_ptx_read_warpid>; 1730def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", 1731 int_ptx_read_nwarpid>; 1732 1733def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", 1734 int_ptx_read_ctaid_x>; 1735def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", 1736 int_ptx_read_ctaid_y>; 1737def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", 1738 int_ptx_read_ctaid_z>; 1739def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", 1740 int_ptx_read_ctaid_w>; 1741 1742def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", 1743 int_ptx_read_nctaid_x>; 1744def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", 1745 int_ptx_read_nctaid_y>; 1746def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", 1747 int_ptx_read_nctaid_z>; 1748def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", 1749 int_ptx_read_nctaid_w>; 1750 1751def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", 1752 int_ptx_read_smid>; 1753def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", 1754 int_ptx_read_nsmid>; 1755def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", 1756 int_ptx_read_gridid>; 1757 1758def PTX_READ_LANEMASK_EQ 1759 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; 1760def PTX_READ_LANEMASK_LE 1761 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; 1762def PTX_READ_LANEMASK_LT 1763 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; 1764def PTX_READ_LANEMASK_GE 1765 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; 1766def PTX_READ_LANEMASK_GT 1767 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; 1768 1769def PTX_READ_CLOCK 1770 : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; 1771def PTX_READ_CLOCK64 1772 : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; 1773 1774def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; 1775def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; 1776def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; 1777def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; 1778 1779// PTX Parallel Synchronization and Communication Intrinsics 1780 1781def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;", 1782 [(int_ptx_bar_sync imm:$i)]>; 1783