AMDGPUISelLowering.cpp revision 249259
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPURegisterInfo.h" 18#include "AMDILIntrinsicInfo.h" 19#include "AMDGPUSubtarget.h" 20#include "llvm/CodeGen/CallingConvLower.h" 21#include "llvm/CodeGen/MachineFunction.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/CodeGen/SelectionDAG.h" 24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 26using namespace llvm; 27 28#include "AMDGPUGenCallingConv.inc" 29 30AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 31 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 32 33 // Initialize target lowering borrowed from AMDIL 34 InitAMDILLowering(); 35 36 // We need to custom lower some of the intrinsics 37 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 38 39 // Library functions. These default to Expand, but we have instructions 40 // for them. 41 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 42 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 43 setOperationAction(ISD::FPOW, MVT::f32, Legal); 44 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 45 setOperationAction(ISD::FABS, MVT::f32, Legal); 46 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 47 setOperationAction(ISD::FRINT, MVT::f32, Legal); 48 49 // Lower floating point store/load to integer store/load to reduce the number 50 // of patterns in tablegen. 51 setOperationAction(ISD::STORE, MVT::f32, Promote); 52 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 53 54 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 55 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 56 57 setOperationAction(ISD::LOAD, MVT::f32, Promote); 58 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 59 60 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 61 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 62 63 setOperationAction(ISD::MUL, MVT::i64, Expand); 64 65 setOperationAction(ISD::UDIV, MVT::i32, Expand); 66 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 67 setOperationAction(ISD::UREM, MVT::i32, Expand); 68} 69 70//===---------------------------------------------------------------------===// 71// TargetLowering Callbacks 72//===---------------------------------------------------------------------===// 73 74void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 75 const SmallVectorImpl<ISD::InputArg> &Ins) const { 76 77 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 78} 79 80SDValue AMDGPUTargetLowering::LowerReturn( 81 SDValue Chain, 82 CallingConv::ID CallConv, 83 bool isVarArg, 84 const SmallVectorImpl<ISD::OutputArg> &Outs, 85 const SmallVectorImpl<SDValue> &OutVals, 86 DebugLoc DL, SelectionDAG &DAG) const { 87 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 88} 89 90//===---------------------------------------------------------------------===// 91// Target specific lowering 92//===---------------------------------------------------------------------===// 93 94SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 95 const { 96 switch (Op.getOpcode()) { 97 default: 98 Op.getNode()->dump(); 99 assert(0 && "Custom lowering code for this" 100 "instruction is not implemented yet!"); 101 break; 102 // AMDIL DAG lowering 103 case ISD::SDIV: return LowerSDIV(Op, DAG); 104 case ISD::SREM: return LowerSREM(Op, DAG); 105 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 106 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 107 // AMDGPU DAG lowering 108 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 109 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 110 } 111 return Op; 112} 113 114SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 115 SelectionDAG &DAG) const { 116 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 117 DebugLoc DL = Op.getDebugLoc(); 118 EVT VT = Op.getValueType(); 119 120 switch (IntrinsicID) { 121 default: return Op; 122 case AMDGPUIntrinsic::AMDIL_abs: 123 return LowerIntrinsicIABS(Op, DAG); 124 case AMDGPUIntrinsic::AMDIL_exp: 125 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 126 case AMDGPUIntrinsic::AMDGPU_lrp: 127 return LowerIntrinsicLRP(Op, DAG); 128 case AMDGPUIntrinsic::AMDIL_fraction: 129 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 130 case AMDGPUIntrinsic::AMDIL_max: 131 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 132 Op.getOperand(2)); 133 case AMDGPUIntrinsic::AMDGPU_imax: 134 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 135 Op.getOperand(2)); 136 case AMDGPUIntrinsic::AMDGPU_umax: 137 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 138 Op.getOperand(2)); 139 case AMDGPUIntrinsic::AMDIL_min: 140 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 141 Op.getOperand(2)); 142 case AMDGPUIntrinsic::AMDGPU_imin: 143 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 144 Op.getOperand(2)); 145 case AMDGPUIntrinsic::AMDGPU_umin: 146 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 147 Op.getOperand(2)); 148 case AMDGPUIntrinsic::AMDIL_round_nearest: 149 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 150 } 151} 152 153///IABS(a) = SMAX(sub(0, a), a) 154SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 155 SelectionDAG &DAG) const { 156 157 DebugLoc DL = Op.getDebugLoc(); 158 EVT VT = Op.getValueType(); 159 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 160 Op.getOperand(1)); 161 162 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 163} 164 165/// Linear Interpolation 166/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 167SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 168 SelectionDAG &DAG) const { 169 DebugLoc DL = Op.getDebugLoc(); 170 EVT VT = Op.getValueType(); 171 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 172 DAG.getConstantFP(1.0f, MVT::f32), 173 Op.getOperand(1)); 174 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 175 Op.getOperand(3)); 176 return DAG.getNode(ISD::FADD, DL, VT, 177 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 178 OneSubAC); 179} 180 181/// \brief Generate Min/Max node 182SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 183 SelectionDAG &DAG) const { 184 DebugLoc DL = Op.getDebugLoc(); 185 EVT VT = Op.getValueType(); 186 187 SDValue LHS = Op.getOperand(0); 188 SDValue RHS = Op.getOperand(1); 189 SDValue True = Op.getOperand(2); 190 SDValue False = Op.getOperand(3); 191 SDValue CC = Op.getOperand(4); 192 193 if (VT != MVT::f32 || 194 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 195 return SDValue(); 196 } 197 198 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 199 switch (CCOpcode) { 200 case ISD::SETOEQ: 201 case ISD::SETONE: 202 case ISD::SETUNE: 203 case ISD::SETNE: 204 case ISD::SETUEQ: 205 case ISD::SETEQ: 206 case ISD::SETFALSE: 207 case ISD::SETFALSE2: 208 case ISD::SETTRUE: 209 case ISD::SETTRUE2: 210 case ISD::SETUO: 211 case ISD::SETO: 212 assert(0 && "Operation should already be optimised !"); 213 case ISD::SETULE: 214 case ISD::SETULT: 215 case ISD::SETOLE: 216 case ISD::SETOLT: 217 case ISD::SETLE: 218 case ISD::SETLT: { 219 if (LHS == True) 220 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 221 else 222 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 223 } 224 case ISD::SETGT: 225 case ISD::SETGE: 226 case ISD::SETUGE: 227 case ISD::SETOGE: 228 case ISD::SETUGT: 229 case ISD::SETOGT: { 230 if (LHS == True) 231 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 232 else 233 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 234 } 235 case ISD::SETCC_INVALID: 236 assert(0 && "Invalid setcc condcode !"); 237 } 238 return Op; 239} 240 241 242 243SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 244 SelectionDAG &DAG) const { 245 DebugLoc DL = Op.getDebugLoc(); 246 EVT VT = Op.getValueType(); 247 248 SDValue Num = Op.getOperand(0); 249 SDValue Den = Op.getOperand(1); 250 251 SmallVector<SDValue, 8> Results; 252 253 // RCP = URECIP(Den) = 2^32 / Den + e 254 // e is rounding error. 255 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 256 257 // RCP_LO = umulo(RCP, Den) */ 258 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 259 260 // RCP_HI = mulhu (RCP, Den) */ 261 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 262 263 // NEG_RCP_LO = -RCP_LO 264 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 265 RCP_LO); 266 267 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 268 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 269 NEG_RCP_LO, RCP_LO, 270 ISD::SETEQ); 271 // Calculate the rounding error from the URECIP instruction 272 // E = mulhu(ABS_RCP_LO, RCP) 273 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 274 275 // RCP_A_E = RCP + E 276 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 277 278 // RCP_S_E = RCP - E 279 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 280 281 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 282 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 283 RCP_A_E, RCP_S_E, 284 ISD::SETEQ); 285 // Quotient = mulhu(Tmp0, Num) 286 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 287 288 // Num_S_Remainder = Quotient * Den 289 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 290 291 // Remainder = Num - Num_S_Remainder 292 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 293 294 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 295 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 296 DAG.getConstant(-1, VT), 297 DAG.getConstant(0, VT), 298 ISD::SETGE); 299 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 300 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 301 DAG.getConstant(0, VT), 302 DAG.getConstant(-1, VT), 303 DAG.getConstant(0, VT), 304 ISD::SETGE); 305 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 306 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 307 Remainder_GE_Zero); 308 309 // Calculate Division result: 310 311 // Quotient_A_One = Quotient + 1 312 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 313 DAG.getConstant(1, VT)); 314 315 // Quotient_S_One = Quotient - 1 316 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 317 DAG.getConstant(1, VT)); 318 319 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 320 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 321 Quotient, Quotient_A_One, ISD::SETEQ); 322 323 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 324 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 325 Quotient_S_One, Div, ISD::SETEQ); 326 327 // Calculate Rem result: 328 329 // Remainder_S_Den = Remainder - Den 330 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 331 332 // Remainder_A_Den = Remainder + Den 333 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 334 335 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 336 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 337 Remainder, Remainder_S_Den, ISD::SETEQ); 338 339 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 340 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 341 Remainder_A_Den, Rem, ISD::SETEQ); 342 SDValue Ops[2]; 343 Ops[0] = Div; 344 Ops[1] = Rem; 345 return DAG.getMergeValues(Ops, 2, DL); 346} 347 348//===----------------------------------------------------------------------===// 349// Helper functions 350//===----------------------------------------------------------------------===// 351 352bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 353 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 354 return CFP->isExactlyValue(1.0); 355 } 356 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 357 return C->isAllOnesValue(); 358 } 359 return false; 360} 361 362bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 363 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 364 return CFP->getValueAPF().isZero(); 365 } 366 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 367 return C->isNullValue(); 368 } 369 return false; 370} 371 372SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 373 const TargetRegisterClass *RC, 374 unsigned Reg, EVT VT) const { 375 MachineFunction &MF = DAG.getMachineFunction(); 376 MachineRegisterInfo &MRI = MF.getRegInfo(); 377 unsigned VirtualRegister; 378 if (!MRI.isLiveIn(Reg)) { 379 VirtualRegister = MRI.createVirtualRegister(RC); 380 MRI.addLiveIn(Reg, VirtualRegister); 381 } else { 382 VirtualRegister = MRI.getLiveInVirtReg(Reg); 383 } 384 return DAG.getRegister(VirtualRegister, VT); 385} 386 387#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 388 389const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 390 switch (Opcode) { 391 default: return 0; 392 // AMDIL DAG nodes 393 NODE_NAME_CASE(CALL); 394 NODE_NAME_CASE(UMUL); 395 NODE_NAME_CASE(DIV_INF); 396 NODE_NAME_CASE(RET_FLAG); 397 NODE_NAME_CASE(BRANCH_COND); 398 399 // AMDGPU DAG nodes 400 NODE_NAME_CASE(DWORDADDR) 401 NODE_NAME_CASE(FRACT) 402 NODE_NAME_CASE(FMAX) 403 NODE_NAME_CASE(SMAX) 404 NODE_NAME_CASE(UMAX) 405 NODE_NAME_CASE(FMIN) 406 NODE_NAME_CASE(SMIN) 407 NODE_NAME_CASE(UMIN) 408 NODE_NAME_CASE(URECIP) 409 NODE_NAME_CASE(EXPORT) 410 NODE_NAME_CASE(CONST_ADDRESS) 411 NODE_NAME_CASE(REGISTER_LOAD) 412 NODE_NAME_CASE(REGISTER_STORE) 413 } 414} 415