SIInstrInfo.cpp revision 288943
1231200Smm//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2231200Smm// 3231200Smm// The LLVM Compiler Infrastructure 4231200Smm// 5231200Smm// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief SI Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15 16#include "SIInstrInfo.h" 17#include "AMDGPUTargetMachine.h" 18#include "SIDefines.h" 19#include "SIMachineFunctionInfo.h" 20#include "llvm/CodeGen/MachineFrameInfo.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/IR/Function.h" 24#include "llvm/CodeGen/RegisterScavenging.h" 25#include "llvm/MC/MCInstrDesc.h" 26#include "llvm/Support/Debug.h" 27 28using namespace llvm; 29 30SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 31 : AMDGPUInstrInfo(st), RI() {} 32 33//===----------------------------------------------------------------------===// 34// TargetInstrInfo callbacks 35//===----------------------------------------------------------------------===// 36 37static unsigned getNumOperandsNoGlue(SDNode *Node) { 38 unsigned N = Node->getNumOperands(); 39 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 40 --N; 41 return N; 42} 43 44static SDValue findChainOperand(SDNode *Load) { 45 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 46 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 47 return LastOp; 48} 49 50/// \brief Returns true if both nodes have the same value for the given 51/// operand \p Op, or if both nodes do not have this operand. 52static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 53 unsigned Opc0 = N0->getMachineOpcode(); 54 unsigned Opc1 = N1->getMachineOpcode(); 55 56 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 57 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 58 59 if (Op0Idx == -1 && Op1Idx == -1) 60 return true; 61 62 63 if ((Op0Idx == -1 && Op1Idx != -1) || 64 (Op1Idx == -1 && Op0Idx != -1)) 65 return false; 66 67 // getNamedOperandIdx returns the index for the MachineInstr's operands, 68 // which includes the result as the first operand. We are indexing into the 69 // MachineSDNode's operands, so we need to skip the result operand to get 70 // the real index. 71 --Op0Idx; 72 --Op1Idx; 73 74 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 75} 76 77bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 78 AliasAnalysis *AA) const { 79 // TODO: The generic check fails for VALU instructions that should be 80 // rematerializable due to implicit reads of exec. We really want all of the 81 // generic logic for this except for this. 82 switch (MI->getOpcode()) { 83 case AMDGPU::V_MOV_B32_e32: 84 case AMDGPU::V_MOV_B32_e64: 85 return true; 86 default: 87 return false; 88 } 89} 90 91bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 92 int64_t &Offset0, 93 int64_t &Offset1) const { 94 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 95 return false; 96 97 unsigned Opc0 = Load0->getMachineOpcode(); 98 unsigned Opc1 = Load1->getMachineOpcode(); 99 100 // Make sure both are actually loads. 101 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 102 return false; 103 104 if (isDS(Opc0) && isDS(Opc1)) { 105 106 // FIXME: Handle this case: 107 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 108 return false; 109 110 // Check base reg. 111 if (Load0->getOperand(1) != Load1->getOperand(1)) 112 return false; 113 114 // Check chain. 115 if (findChainOperand(Load0) != findChainOperand(Load1)) 116 return false; 117 118 // Skip read2 / write2 variants for simplicity. 119 // TODO: We should report true if the used offsets are adjacent (excluded 120 // st64 versions). 121 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 122 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 123 return false; 124 125 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 126 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 127 return true; 128 } 129 130 if (isSMRD(Opc0) && isSMRD(Opc1)) { 131 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 132 133 // Check base reg. 134 if (Load0->getOperand(0) != Load1->getOperand(0)) 135 return false; 136 137 const ConstantSDNode *Load0Offset = 138 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 139 const ConstantSDNode *Load1Offset = 140 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 141 142 if (!Load0Offset || !Load1Offset) 143 return false; 144 145 // Check chain. 146 if (findChainOperand(Load0) != findChainOperand(Load1)) 147 return false; 148 149 Offset0 = Load0Offset->getZExtValue(); 150 Offset1 = Load1Offset->getZExtValue(); 151 return true; 152 } 153 154 // MUBUF and MTBUF can access the same addresses. 155 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 156 157 // MUBUF and MTBUF have vaddr at different indices. 158 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 159 findChainOperand(Load0) != findChainOperand(Load1) || 160 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 161 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 162 return false; 163 164 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 165 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 166 167 if (OffIdx0 == -1 || OffIdx1 == -1) 168 return false; 169 170 // getNamedOperandIdx returns the index for MachineInstrs. Since they 171 // inlcude the output in the operand list, but SDNodes don't, we need to 172 // subtract the index by one. 173 --OffIdx0; 174 --OffIdx1; 175 176 SDValue Off0 = Load0->getOperand(OffIdx0); 177 SDValue Off1 = Load1->getOperand(OffIdx1); 178 179 // The offset might be a FrameIndexSDNode. 180 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 181 return false; 182 183 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 184 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 185 return true; 186 } 187 188 return false; 189} 190 191static bool isStride64(unsigned Opc) { 192 switch (Opc) { 193 case AMDGPU::DS_READ2ST64_B32: 194 case AMDGPU::DS_READ2ST64_B64: 195 case AMDGPU::DS_WRITE2ST64_B32: 196 case AMDGPU::DS_WRITE2ST64_B64: 197 return true; 198 default: 199 return false; 200 } 201} 202 203bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 204 unsigned &Offset, 205 const TargetRegisterInfo *TRI) const { 206 unsigned Opc = LdSt->getOpcode(); 207 if (isDS(Opc)) { 208 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 209 AMDGPU::OpName::offset); 210 if (OffsetImm) { 211 // Normal, single offset LDS instruction. 212 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 213 AMDGPU::OpName::addr); 214 215 BaseReg = AddrReg->getReg(); 216 Offset = OffsetImm->getImm(); 217 return true; 218 } 219 220 // The 2 offset instructions use offset0 and offset1 instead. We can treat 221 // these as a load with a single offset if the 2 offsets are consecutive. We 222 // will use this for some partially aligned loads. 223 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, 224 AMDGPU::OpName::offset0); 225 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, 226 AMDGPU::OpName::offset1); 227 228 uint8_t Offset0 = Offset0Imm->getImm(); 229 uint8_t Offset1 = Offset1Imm->getImm(); 230 231 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 232 // Each of these offsets is in element sized units, so we need to convert 233 // to bytes of the individual reads. 234 235 unsigned EltSize; 236 if (LdSt->mayLoad()) 237 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; 238 else { 239 assert(LdSt->mayStore()); 240 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 241 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 242 } 243 244 if (isStride64(Opc)) 245 EltSize *= 64; 246 247 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 248 AMDGPU::OpName::addr); 249 BaseReg = AddrReg->getReg(); 250 Offset = EltSize * Offset0; 251 return true; 252 } 253 254 return false; 255 } 256 257 if (isMUBUF(Opc) || isMTBUF(Opc)) { 258 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 259 return false; 260 261 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 262 AMDGPU::OpName::vaddr); 263 if (!AddrReg) 264 return false; 265 266 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 267 AMDGPU::OpName::offset); 268 BaseReg = AddrReg->getReg(); 269 Offset = OffsetImm->getImm(); 270 return true; 271 } 272 273 if (isSMRD(Opc)) { 274 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 275 AMDGPU::OpName::offset); 276 if (!OffsetImm) 277 return false; 278 279 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, 280 AMDGPU::OpName::sbase); 281 BaseReg = SBaseReg->getReg(); 282 Offset = OffsetImm->getImm(); 283 return true; 284 } 285 286 return false; 287} 288 289bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 290 MachineInstr *SecondLdSt, 291 unsigned NumLoads) const { 292 unsigned Opc0 = FirstLdSt->getOpcode(); 293 unsigned Opc1 = SecondLdSt->getOpcode(); 294 295 // TODO: This needs finer tuning 296 if (NumLoads > 4) 297 return false; 298 299 if (isDS(Opc0) && isDS(Opc1)) 300 return true; 301 302 if (isSMRD(Opc0) && isSMRD(Opc1)) 303 return true; 304 305 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) 306 return true; 307 308 return false; 309} 310 311void 312SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 313 MachineBasicBlock::iterator MI, DebugLoc DL, 314 unsigned DestReg, unsigned SrcReg, 315 bool KillSrc) const { 316 317 // If we are trying to copy to or from SCC, there is a bug somewhere else in 318 // the backend. While it may be theoretically possible to do this, it should 319 // never be necessary. 320 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 321 322 static const int16_t Sub0_15[] = { 323 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 324 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 325 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 326 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 327 }; 328 329 static const int16_t Sub0_7[] = { 330 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 331 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 332 }; 333 334 static const int16_t Sub0_3[] = { 335 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 336 }; 337 338 static const int16_t Sub0_2[] = { 339 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 340 }; 341 342 static const int16_t Sub0_1[] = { 343 AMDGPU::sub0, AMDGPU::sub1, 0 344 }; 345 346 unsigned Opcode; 347 const int16_t *SubIndices; 348 349 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 350 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 351 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 352 .addReg(SrcReg, getKillRegState(KillSrc)); 353 return; 354 355 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 356 if (DestReg == AMDGPU::VCC) { 357 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 358 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 359 .addReg(SrcReg, getKillRegState(KillSrc)); 360 } else { 361 // FIXME: Hack until VReg_1 removed. 362 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 363 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC) 364 .addImm(0) 365 .addReg(SrcReg, getKillRegState(KillSrc)); 366 } 367 368 return; 369 } 370 371 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 372 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 373 .addReg(SrcReg, getKillRegState(KillSrc)); 374 return; 375 376 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 377 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 378 Opcode = AMDGPU::S_MOV_B32; 379 SubIndices = Sub0_3; 380 381 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 382 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 383 Opcode = AMDGPU::S_MOV_B32; 384 SubIndices = Sub0_7; 385 386 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 387 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 388 Opcode = AMDGPU::S_MOV_B32; 389 SubIndices = Sub0_15; 390 391 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 392 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 393 AMDGPU::SReg_32RegClass.contains(SrcReg)); 394 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 395 .addReg(SrcReg, getKillRegState(KillSrc)); 396 return; 397 398 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 399 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 400 AMDGPU::SReg_64RegClass.contains(SrcReg)); 401 Opcode = AMDGPU::V_MOV_B32_e32; 402 SubIndices = Sub0_1; 403 404 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 405 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 406 Opcode = AMDGPU::V_MOV_B32_e32; 407 SubIndices = Sub0_2; 408 409 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 410 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 411 AMDGPU::SReg_128RegClass.contains(SrcReg)); 412 Opcode = AMDGPU::V_MOV_B32_e32; 413 SubIndices = Sub0_3; 414 415 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 416 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 417 AMDGPU::SReg_256RegClass.contains(SrcReg)); 418 Opcode = AMDGPU::V_MOV_B32_e32; 419 SubIndices = Sub0_7; 420 421 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 422 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 423 AMDGPU::SReg_512RegClass.contains(SrcReg)); 424 Opcode = AMDGPU::V_MOV_B32_e32; 425 SubIndices = Sub0_15; 426 427 } else { 428 llvm_unreachable("Can't copy register!"); 429 } 430 431 while (unsigned SubIdx = *SubIndices++) { 432 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 433 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 434 435 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 436 437 if (*SubIndices) 438 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 439 } 440} 441 442int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 443 const unsigned Opcode = MI.getOpcode(); 444 445 int NewOpc; 446 447 // Try to map original to commuted opcode 448 NewOpc = AMDGPU::getCommuteRev(Opcode); 449 if (NewOpc != -1) 450 // Check if the commuted (REV) opcode exists on the target. 451 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 452 453 // Try to map commuted to original opcode 454 NewOpc = AMDGPU::getCommuteOrig(Opcode); 455 if (NewOpc != -1) 456 // Check if the original (non-REV) opcode exists on the target. 457 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 458 459 return Opcode; 460} 461 462unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 463 464 if (DstRC->getSize() == 4) { 465 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 466 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 467 return AMDGPU::S_MOV_B64; 468 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 469 return AMDGPU::V_MOV_B64_PSEUDO; 470 } 471 return AMDGPU::COPY; 472} 473 474void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 475 MachineBasicBlock::iterator MI, 476 unsigned SrcReg, bool isKill, 477 int FrameIndex, 478 const TargetRegisterClass *RC, 479 const TargetRegisterInfo *TRI) const { 480 MachineFunction *MF = MBB.getParent(); 481 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 482 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 483 DebugLoc DL = MBB.findDebugLoc(MI); 484 int Opcode = -1; 485 486 if (RI.isSGPRClass(RC)) { 487 // We are only allowed to create one new instruction when spilling 488 // registers, so we need to use pseudo instruction for spilling 489 // SGPRs. 490 switch (RC->getSize() * 8) { 491 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; 492 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 493 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 494 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 495 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 496 } 497 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 498 MFI->setHasSpilledVGPRs(); 499 500 switch(RC->getSize() * 8) { 501 case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; 502 case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; 503 case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break; 504 case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break; 505 case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break; 506 case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break; 507 } 508 } 509 510 if (Opcode != -1) { 511 FrameInfo->setObjectAlignment(FrameIndex, 4); 512 BuildMI(MBB, MI, DL, get(Opcode)) 513 .addReg(SrcReg) 514 .addFrameIndex(FrameIndex) 515 // Place-holder registers, these will be filled in by 516 // SIPrepareScratchRegs. 517 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 518 .addReg(AMDGPU::SGPR0, RegState::Undef); 519 } else { 520 LLVMContext &Ctx = MF->getFunction()->getContext(); 521 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 522 " spill register"); 523 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 524 .addReg(SrcReg); 525 } 526} 527 528void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 529 MachineBasicBlock::iterator MI, 530 unsigned DestReg, int FrameIndex, 531 const TargetRegisterClass *RC, 532 const TargetRegisterInfo *TRI) const { 533 MachineFunction *MF = MBB.getParent(); 534 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 535 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 536 DebugLoc DL = MBB.findDebugLoc(MI); 537 int Opcode = -1; 538 539 if (RI.isSGPRClass(RC)){ 540 switch(RC->getSize() * 8) { 541 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; 542 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 543 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 544 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 545 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 546 } 547 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 548 switch(RC->getSize() * 8) { 549 case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; 550 case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; 551 case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break; 552 case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break; 553 case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break; 554 case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break; 555 } 556 } 557 558 if (Opcode != -1) { 559 FrameInfo->setObjectAlignment(FrameIndex, 4); 560 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 561 .addFrameIndex(FrameIndex) 562 // Place-holder registers, these will be filled in by 563 // SIPrepareScratchRegs. 564 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 565 .addReg(AMDGPU::SGPR0, RegState::Undef); 566 567 } else { 568 LLVMContext &Ctx = MF->getFunction()->getContext(); 569 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 570 " restore register"); 571 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 572 } 573} 574 575/// \param @Offset Offset in bytes of the FrameIndex being spilled 576unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator MI, 578 RegScavenger *RS, unsigned TmpReg, 579 unsigned FrameOffset, 580 unsigned Size) const { 581 MachineFunction *MF = MBB.getParent(); 582 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 583 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); 584 const SIRegisterInfo *TRI = 585 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); 586 DebugLoc DL = MBB.findDebugLoc(MI); 587 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 588 unsigned WavefrontSize = ST.getWavefrontSize(); 589 590 unsigned TIDReg = MFI->getTIDReg(); 591 if (!MFI->hasCalculatedTID()) { 592 MachineBasicBlock &Entry = MBB.getParent()->front(); 593 MachineBasicBlock::iterator Insert = Entry.front(); 594 DebugLoc DL = Insert->getDebugLoc(); 595 596 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 597 if (TIDReg == AMDGPU::NoRegister) 598 return TIDReg; 599 600 601 if (MFI->getShaderType() == ShaderType::COMPUTE && 602 WorkGroupSize > WavefrontSize) { 603 604 unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X); 605 unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y); 606 unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); 607 unsigned InputPtrReg = 608 TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); 609 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 610 if (!Entry.isLiveIn(Reg)) 611 Entry.addLiveIn(Reg); 612 } 613 614 RS->enterBasicBlock(&Entry); 615 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 616 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 617 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 618 .addReg(InputPtrReg) 619 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 620 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 621 .addReg(InputPtrReg) 622 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 623 624 // NGROUPS.X * NGROUPS.Y 625 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 626 .addReg(STmp1) 627 .addReg(STmp0); 628 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 629 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 630 .addReg(STmp1) 631 .addReg(TIDIGXReg); 632 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 633 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 634 .addReg(STmp0) 635 .addReg(TIDIGYReg) 636 .addReg(TIDReg); 637 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 638 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 639 .addReg(TIDReg) 640 .addReg(TIDIGZReg); 641 } else { 642 // Get the wave id 643 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 644 TIDReg) 645 .addImm(-1) 646 .addImm(0); 647 648 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 649 TIDReg) 650 .addImm(-1) 651 .addReg(TIDReg); 652 } 653 654 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 655 TIDReg) 656 .addImm(2) 657 .addReg(TIDReg); 658 MFI->setTIDReg(TIDReg); 659 } 660 661 // Add FrameIndex to LDS offset 662 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 663 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 664 .addImm(LDSOffset) 665 .addReg(TIDReg); 666 667 return TmpReg; 668} 669 670void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 671 int Count) const { 672 while (Count > 0) { 673 int Arg; 674 if (Count >= 8) 675 Arg = 7; 676 else 677 Arg = Count - 1; 678 Count -= 8; 679 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) 680 .addImm(Arg); 681 } 682} 683 684bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 685 MachineBasicBlock &MBB = *MI->getParent(); 686 DebugLoc DL = MBB.findDebugLoc(MI); 687 switch (MI->getOpcode()) { 688 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 689 690 case AMDGPU::SI_CONSTDATA_PTR: { 691 unsigned Reg = MI->getOperand(0).getReg(); 692 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); 693 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); 694 695 BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); 696 697 // Add 32-bit offset from this instruction to the start of the constant data. 698 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) 699 .addReg(RegLo) 700 .addTargetIndex(AMDGPU::TI_CONSTDATA_START) 701 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); 702 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) 703 .addReg(RegHi) 704 .addImm(0) 705 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) 706 .addReg(AMDGPU::SCC, RegState::Implicit); 707 MI->eraseFromParent(); 708 break; 709 } 710 case AMDGPU::SGPR_USE: 711 // This is just a placeholder for register allocation. 712 MI->eraseFromParent(); 713 break; 714 715 case AMDGPU::V_MOV_B64_PSEUDO: { 716 unsigned Dst = MI->getOperand(0).getReg(); 717 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 718 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 719 720 const MachineOperand &SrcOp = MI->getOperand(1); 721 // FIXME: Will this work for 64-bit floating point immediates? 722 assert(!SrcOp.isFPImm()); 723 if (SrcOp.isImm()) { 724 APInt Imm(64, SrcOp.getImm()); 725 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 726 .addImm(Imm.getLoBits(32).getZExtValue()) 727 .addReg(Dst, RegState::Implicit); 728 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 729 .addImm(Imm.getHiBits(32).getZExtValue()) 730 .addReg(Dst, RegState::Implicit); 731 } else { 732 assert(SrcOp.isReg()); 733 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 734 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 735 .addReg(Dst, RegState::Implicit); 736 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 737 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 738 .addReg(Dst, RegState::Implicit); 739 } 740 MI->eraseFromParent(); 741 break; 742 } 743 744 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 745 unsigned Dst = MI->getOperand(0).getReg(); 746 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 747 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 748 unsigned Src0 = MI->getOperand(1).getReg(); 749 unsigned Src1 = MI->getOperand(2).getReg(); 750 const MachineOperand &SrcCond = MI->getOperand(3); 751 752 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 753 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 754 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 755 .addOperand(SrcCond); 756 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 757 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 758 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 759 .addOperand(SrcCond); 760 MI->eraseFromParent(); 761 break; 762 } 763 } 764 return true; 765} 766 767MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 768 bool NewMI) const { 769 770 if (MI->getNumOperands() < 3) 771 return nullptr; 772 773 int CommutedOpcode = commuteOpcode(*MI); 774 if (CommutedOpcode == -1) 775 return nullptr; 776 777 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 778 AMDGPU::OpName::src0); 779 assert(Src0Idx != -1 && "Should always have src0 operand"); 780 781 MachineOperand &Src0 = MI->getOperand(Src0Idx); 782 if (!Src0.isReg()) 783 return nullptr; 784 785 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 786 AMDGPU::OpName::src1); 787 if (Src1Idx == -1) 788 return nullptr; 789 790 MachineOperand &Src1 = MI->getOperand(Src1Idx); 791 792 // Make sure it's legal to commute operands for VOP2. 793 if (isVOP2(MI->getOpcode()) && 794 (!isOperandLegal(MI, Src0Idx, &Src1) || 795 !isOperandLegal(MI, Src1Idx, &Src0))) { 796 return nullptr; 797 } 798 799 if (!Src1.isReg()) { 800 // Allow commuting instructions with Imm operands. 801 if (NewMI || !Src1.isImm() || 802 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 803 return nullptr; 804 } 805 806 // Be sure to copy the source modifiers to the right place. 807 if (MachineOperand *Src0Mods 808 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { 809 MachineOperand *Src1Mods 810 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); 811 812 int Src0ModsVal = Src0Mods->getImm(); 813 if (!Src1Mods && Src0ModsVal != 0) 814 return nullptr; 815 816 // XXX - This assert might be a lie. It might be useful to have a neg 817 // modifier with 0.0. 818 int Src1ModsVal = Src1Mods->getImm(); 819 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 820 821 Src1Mods->setImm(Src0ModsVal); 822 Src0Mods->setImm(Src1ModsVal); 823 } 824 825 unsigned Reg = Src0.getReg(); 826 unsigned SubReg = Src0.getSubReg(); 827 if (Src1.isImm()) 828 Src0.ChangeToImmediate(Src1.getImm()); 829 else 830 llvm_unreachable("Should only have immediates"); 831 832 Src1.ChangeToRegister(Reg, false); 833 Src1.setSubReg(SubReg); 834 } else { 835 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 836 } 837 838 if (MI) 839 MI->setDesc(get(CommutedOpcode)); 840 841 return MI; 842} 843 844// This needs to be implemented because the source modifiers may be inserted 845// between the true commutable operands, and the base 846// TargetInstrInfo::commuteInstruction uses it. 847bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, 848 unsigned &SrcOpIdx1, 849 unsigned &SrcOpIdx2) const { 850 const MCInstrDesc &MCID = MI->getDesc(); 851 if (!MCID.isCommutable()) 852 return false; 853 854 unsigned Opc = MI->getOpcode(); 855 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 856 if (Src0Idx == -1) 857 return false; 858 859 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 860 // immediate. 861 if (!MI->getOperand(Src0Idx).isReg()) 862 return false; 863 864 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 865 if (Src1Idx == -1) 866 return false; 867 868 if (!MI->getOperand(Src1Idx).isReg()) 869 return false; 870 871 // If any source modifiers are set, the generic instruction commuting won't 872 // understand how to copy the source modifiers. 873 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || 874 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) 875 return false; 876 877 SrcOpIdx1 = Src0Idx; 878 SrcOpIdx2 = Src1Idx; 879 return true; 880} 881 882MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 883 MachineBasicBlock::iterator I, 884 unsigned DstReg, 885 unsigned SrcReg) const { 886 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 887 DstReg) .addReg(SrcReg); 888} 889 890bool SIInstrInfo::isMov(unsigned Opcode) const { 891 switch(Opcode) { 892 default: return false; 893 case AMDGPU::S_MOV_B32: 894 case AMDGPU::S_MOV_B64: 895 case AMDGPU::V_MOV_B32_e32: 896 case AMDGPU::V_MOV_B32_e64: 897 return true; 898 } 899} 900 901bool 902SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 903 return RC != &AMDGPU::EXECRegRegClass; 904} 905 906static void removeModOperands(MachineInstr &MI) { 907 unsigned Opc = MI.getOpcode(); 908 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 909 AMDGPU::OpName::src0_modifiers); 910 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 911 AMDGPU::OpName::src1_modifiers); 912 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 913 AMDGPU::OpName::src2_modifiers); 914 915 MI.RemoveOperand(Src2ModIdx); 916 MI.RemoveOperand(Src1ModIdx); 917 MI.RemoveOperand(Src0ModIdx); 918} 919 920bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, 921 unsigned Reg, MachineRegisterInfo *MRI) const { 922 if (!MRI->hasOneNonDBGUse(Reg)) 923 return false; 924 925 unsigned Opc = UseMI->getOpcode(); 926 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 927 // Don't fold if we are using source modifiers. The new VOP2 instructions 928 // don't have them. 929 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || 930 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || 931 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { 932 return false; 933 } 934 935 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); 936 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); 937 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); 938 939 // Multiplied part is the constant: Use v_madmk_f32 940 // We should only expect these to be on src0 due to canonicalizations. 941 if (Src0->isReg() && Src0->getReg() == Reg) { 942 if (!Src1->isReg() || 943 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 944 return false; 945 946 if (!Src2->isReg() || 947 (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))) 948 return false; 949 950 // We need to do some weird looking operand shuffling since the madmk 951 // operands are out of the normal expected order with the multiplied 952 // constant as the last operand. 953 // 954 // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1 955 // src0 -> src2 K 956 // src1 -> src0 957 // src2 -> src1 958 959 const int64_t Imm = DefMI->getOperand(1).getImm(); 960 961 // FIXME: This would be a lot easier if we could return a new instruction 962 // instead of having to modify in place. 963 964 // Remove these first since they are at the end. 965 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 966 AMDGPU::OpName::omod)); 967 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 968 AMDGPU::OpName::clamp)); 969 970 unsigned Src1Reg = Src1->getReg(); 971 unsigned Src1SubReg = Src1->getSubReg(); 972 unsigned Src2Reg = Src2->getReg(); 973 unsigned Src2SubReg = Src2->getSubReg(); 974 Src0->setReg(Src1Reg); 975 Src0->setSubReg(Src1SubReg); 976 Src0->setIsKill(Src1->isKill()); 977 978 Src1->setReg(Src2Reg); 979 Src1->setSubReg(Src2SubReg); 980 Src1->setIsKill(Src2->isKill()); 981 982 if (Opc == AMDGPU::V_MAC_F32_e64) { 983 UseMI->untieRegOperand( 984 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 985 } 986 987 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 988 AMDGPU::OpName::src2)); 989 // ChangingToImmediate adds Src2 back to the instruction. 990 Src2->ChangeToImmediate(Imm); 991 992 removeModOperands(*UseMI); 993 UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); 994 995 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 996 if (DeleteDef) 997 DefMI->eraseFromParent(); 998 999 return true; 1000 } 1001 1002 // Added part is the constant: Use v_madak_f32 1003 if (Src2->isReg() && Src2->getReg() == Reg) { 1004 // Not allowed to use constant bus for another operand. 1005 // We can however allow an inline immediate as src0. 1006 if (!Src0->isImm() && 1007 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1008 return false; 1009 1010 if (!Src1->isReg() || 1011 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 1012 return false; 1013 1014 const int64_t Imm = DefMI->getOperand(1).getImm(); 1015 1016 // FIXME: This would be a lot easier if we could return a new instruction 1017 // instead of having to modify in place. 1018 1019 // Remove these first since they are at the end. 1020 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1021 AMDGPU::OpName::omod)); 1022 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1023 AMDGPU::OpName::clamp)); 1024 1025 if (Opc == AMDGPU::V_MAC_F32_e64) { 1026 UseMI->untieRegOperand( 1027 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1028 } 1029 1030 // ChangingToImmediate adds Src2 back to the instruction. 1031 Src2->ChangeToImmediate(Imm); 1032 1033 // These come before src2. 1034 removeModOperands(*UseMI); 1035 UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); 1036 1037 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1038 if (DeleteDef) 1039 DefMI->eraseFromParent(); 1040 1041 return true; 1042 } 1043 } 1044 1045 return false; 1046} 1047 1048bool 1049SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI, 1050 AliasAnalysis *AA) const { 1051 switch(MI->getOpcode()) { 1052 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA); 1053 case AMDGPU::S_MOV_B32: 1054 case AMDGPU::S_MOV_B64: 1055 case AMDGPU::V_MOV_B32_e32: 1056 return MI->getOperand(1).isImm(); 1057 } 1058} 1059 1060static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1061 int WidthB, int OffsetB) { 1062 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1063 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1064 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1065 return LowOffset + LowWidth <= HighOffset; 1066} 1067 1068bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, 1069 MachineInstr *MIb) const { 1070 unsigned BaseReg0, Offset0; 1071 unsigned BaseReg1, Offset1; 1072 1073 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1074 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1075 assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() && 1076 "read2 / write2 not expected here yet"); 1077 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); 1078 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); 1079 if (BaseReg0 == BaseReg1 && 1080 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1081 return true; 1082 } 1083 } 1084 1085 return false; 1086} 1087 1088bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 1089 MachineInstr *MIb, 1090 AliasAnalysis *AA) const { 1091 unsigned Opc0 = MIa->getOpcode(); 1092 unsigned Opc1 = MIb->getOpcode(); 1093 1094 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 1095 "MIa must load from or modify a memory location"); 1096 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 1097 "MIb must load from or modify a memory location"); 1098 1099 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) 1100 return false; 1101 1102 // XXX - Can we relax this between address spaces? 1103 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 1104 return false; 1105 1106 // TODO: Should we check the address space from the MachineMemOperand? That 1107 // would allow us to distinguish objects we know don't alias based on the 1108 // underlying addres space, even if it was lowered to a different one, 1109 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1110 // buffer. 1111 if (isDS(Opc0)) { 1112 if (isDS(Opc1)) 1113 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1114 1115 return !isFLAT(Opc1); 1116 } 1117 1118 if (isMUBUF(Opc0) || isMTBUF(Opc0)) { 1119 if (isMUBUF(Opc1) || isMTBUF(Opc1)) 1120 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1121 1122 return !isFLAT(Opc1) && !isSMRD(Opc1); 1123 } 1124 1125 if (isSMRD(Opc0)) { 1126 if (isSMRD(Opc1)) 1127 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1128 1129 return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0); 1130 } 1131 1132 if (isFLAT(Opc0)) { 1133 if (isFLAT(Opc1)) 1134 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1135 1136 return false; 1137 } 1138 1139 return false; 1140} 1141 1142MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1143 MachineBasicBlock::iterator &MI, 1144 LiveVariables *LV) const { 1145 1146 switch (MI->getOpcode()) { 1147 default: return nullptr; 1148 case AMDGPU::V_MAC_F32_e64: break; 1149 case AMDGPU::V_MAC_F32_e32: { 1150 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1151 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1152 return nullptr; 1153 break; 1154 } 1155 } 1156 1157 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst); 1158 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1159 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); 1160 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); 1161 1162 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1163 .addOperand(*Dst) 1164 .addImm(0) // Src0 mods 1165 .addOperand(*Src0) 1166 .addImm(0) // Src1 mods 1167 .addOperand(*Src1) 1168 .addImm(0) // Src mods 1169 .addOperand(*Src2) 1170 .addImm(0) // clamp 1171 .addImm(0); // omod 1172} 1173 1174bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1175 int64_t SVal = Imm.getSExtValue(); 1176 if (SVal >= -16 && SVal <= 64) 1177 return true; 1178 1179 if (Imm.getBitWidth() == 64) { 1180 uint64_t Val = Imm.getZExtValue(); 1181 return (DoubleToBits(0.0) == Val) || 1182 (DoubleToBits(1.0) == Val) || 1183 (DoubleToBits(-1.0) == Val) || 1184 (DoubleToBits(0.5) == Val) || 1185 (DoubleToBits(-0.5) == Val) || 1186 (DoubleToBits(2.0) == Val) || 1187 (DoubleToBits(-2.0) == Val) || 1188 (DoubleToBits(4.0) == Val) || 1189 (DoubleToBits(-4.0) == Val); 1190 } 1191 1192 // The actual type of the operand does not seem to matter as long 1193 // as the bits match one of the inline immediate values. For example: 1194 // 1195 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1196 // so it is a legal inline immediate. 1197 // 1198 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1199 // floating-point, so it is a legal inline immediate. 1200 uint32_t Val = Imm.getZExtValue(); 1201 1202 return (FloatToBits(0.0f) == Val) || 1203 (FloatToBits(1.0f) == Val) || 1204 (FloatToBits(-1.0f) == Val) || 1205 (FloatToBits(0.5f) == Val) || 1206 (FloatToBits(-0.5f) == Val) || 1207 (FloatToBits(2.0f) == Val) || 1208 (FloatToBits(-2.0f) == Val) || 1209 (FloatToBits(4.0f) == Val) || 1210 (FloatToBits(-4.0f) == Val); 1211} 1212 1213bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1214 unsigned OpSize) const { 1215 if (MO.isImm()) { 1216 // MachineOperand provides no way to tell the true operand size, since it 1217 // only records a 64-bit value. We need to know the size to determine if a 1218 // 32-bit floating point immediate bit pattern is legal for an integer 1219 // immediate. It would be for any 32-bit integer operand, but would not be 1220 // for a 64-bit one. 1221 1222 unsigned BitSize = 8 * OpSize; 1223 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1224 } 1225 1226 return false; 1227} 1228 1229bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1230 unsigned OpSize) const { 1231 return MO.isImm() && !isInlineConstant(MO, OpSize); 1232} 1233 1234static bool compareMachineOp(const MachineOperand &Op0, 1235 const MachineOperand &Op1) { 1236 if (Op0.getType() != Op1.getType()) 1237 return false; 1238 1239 switch (Op0.getType()) { 1240 case MachineOperand::MO_Register: 1241 return Op0.getReg() == Op1.getReg(); 1242 case MachineOperand::MO_Immediate: 1243 return Op0.getImm() == Op1.getImm(); 1244 default: 1245 llvm_unreachable("Didn't expect to be comparing these operand types"); 1246 } 1247} 1248 1249bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1250 const MachineOperand &MO) const { 1251 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1252 1253 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1254 1255 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1256 return true; 1257 1258 if (OpInfo.RegClass < 0) 1259 return false; 1260 1261 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1262 if (isLiteralConstant(MO, OpSize)) 1263 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1264 1265 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1266} 1267 1268bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1269 int Op32 = AMDGPU::getVOPe32(Opcode); 1270 if (Op32 == -1) 1271 return false; 1272 1273 return pseudoToMCOpcode(Op32) != -1; 1274} 1275 1276bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1277 // The src0_modifier operand is present on all instructions 1278 // that have modifiers. 1279 1280 return AMDGPU::getNamedOperandIdx(Opcode, 1281 AMDGPU::OpName::src0_modifiers) != -1; 1282} 1283 1284bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1285 unsigned OpName) const { 1286 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1287 return Mods && Mods->getImm(); 1288} 1289 1290bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1291 const MachineOperand &MO, 1292 unsigned OpSize) const { 1293 // Literal constants use the constant bus. 1294 if (isLiteralConstant(MO, OpSize)) 1295 return true; 1296 1297 if (!MO.isReg() || !MO.isUse()) 1298 return false; 1299 1300 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1301 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1302 1303 // FLAT_SCR is just an SGPR pair. 1304 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1305 return true; 1306 1307 // EXEC register uses the constant bus. 1308 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1309 return true; 1310 1311 // SGPRs use the constant bus 1312 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 1313 (!MO.isImplicit() && 1314 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1315 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 1316 return true; 1317 } 1318 1319 return false; 1320} 1321 1322bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1323 StringRef &ErrInfo) const { 1324 uint16_t Opcode = MI->getOpcode(); 1325 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1326 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1327 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1328 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1329 1330 // Make sure the number of operands is correct. 1331 const MCInstrDesc &Desc = get(Opcode); 1332 if (!Desc.isVariadic() && 1333 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 1334 ErrInfo = "Instruction has wrong number of operands."; 1335 return false; 1336 } 1337 1338 // Make sure the register classes are correct 1339 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1340 if (MI->getOperand(i).isFPImm()) { 1341 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1342 "all fp values to integers."; 1343 return false; 1344 } 1345 1346 int RegClass = Desc.OpInfo[i].RegClass; 1347 1348 switch (Desc.OpInfo[i].OperandType) { 1349 case MCOI::OPERAND_REGISTER: 1350 if (MI->getOperand(i).isImm()) { 1351 ErrInfo = "Illegal immediate value for operand."; 1352 return false; 1353 } 1354 break; 1355 case AMDGPU::OPERAND_REG_IMM32: 1356 break; 1357 case AMDGPU::OPERAND_REG_INLINE_C: 1358 if (isLiteralConstant(MI->getOperand(i), 1359 RI.getRegClass(RegClass)->getSize())) { 1360 ErrInfo = "Illegal immediate value for operand."; 1361 return false; 1362 } 1363 break; 1364 case MCOI::OPERAND_IMMEDIATE: 1365 // Check if this operand is an immediate. 1366 // FrameIndex operands will be replaced by immediates, so they are 1367 // allowed. 1368 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { 1369 ErrInfo = "Expected immediate, but got non-immediate"; 1370 return false; 1371 } 1372 // Fall-through 1373 default: 1374 continue; 1375 } 1376 1377 if (!MI->getOperand(i).isReg()) 1378 continue; 1379 1380 if (RegClass != -1) { 1381 unsigned Reg = MI->getOperand(i).getReg(); 1382 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1383 continue; 1384 1385 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1386 if (!RC->contains(Reg)) { 1387 ErrInfo = "Operand has incorrect register class."; 1388 return false; 1389 } 1390 } 1391 } 1392 1393 1394 // Verify VOP* 1395 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 1396 // Only look at the true operands. Only a real operand can use the constant 1397 // bus, and we don't want to check pseudo-operands like the source modifier 1398 // flags. 1399 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1400 1401 unsigned ConstantBusCount = 0; 1402 unsigned SGPRUsed = AMDGPU::NoRegister; 1403 for (int OpIdx : OpIndices) { 1404 if (OpIdx == -1) 1405 break; 1406 const MachineOperand &MO = MI->getOperand(OpIdx); 1407 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1408 if (MO.isReg()) { 1409 if (MO.getReg() != SGPRUsed) 1410 ++ConstantBusCount; 1411 SGPRUsed = MO.getReg(); 1412 } else { 1413 ++ConstantBusCount; 1414 } 1415 } 1416 } 1417 if (ConstantBusCount > 1) { 1418 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1419 return false; 1420 } 1421 } 1422 1423 // Verify misc. restrictions on specific instructions. 1424 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1425 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1426 const MachineOperand &Src0 = MI->getOperand(Src0Idx); 1427 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1428 const MachineOperand &Src2 = MI->getOperand(Src2Idx); 1429 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1430 if (!compareMachineOp(Src0, Src1) && 1431 !compareMachineOp(Src0, Src2)) { 1432 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1433 return false; 1434 } 1435 } 1436 } 1437 1438 return true; 1439} 1440 1441unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1442 switch (MI.getOpcode()) { 1443 default: return AMDGPU::INSTRUCTION_LIST_END; 1444 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1445 case AMDGPU::COPY: return AMDGPU::COPY; 1446 case AMDGPU::PHI: return AMDGPU::PHI; 1447 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1448 case AMDGPU::S_MOV_B32: 1449 return MI.getOperand(1).isReg() ? 1450 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1451 case AMDGPU::S_ADD_I32: 1452 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1453 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1454 case AMDGPU::S_SUB_I32: 1455 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1456 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1457 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1458 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1459 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1460 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1461 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1462 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1463 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1464 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1465 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1466 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1467 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1468 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1469 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1470 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1471 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1472 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1473 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1474 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1475 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1476 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1477 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1478 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1479 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1480 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1481 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1482 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1483 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1484 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1485 case AMDGPU::S_LOAD_DWORD_IMM: 1486 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 1487 case AMDGPU::S_LOAD_DWORDX2_IMM: 1488 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 1489 case AMDGPU::S_LOAD_DWORDX4_IMM: 1490 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 1491 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1492 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1493 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1494 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1495 } 1496} 1497 1498bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1499 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1500} 1501 1502const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1503 unsigned OpNo) const { 1504 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1505 const MCInstrDesc &Desc = get(MI.getOpcode()); 1506 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1507 Desc.OpInfo[OpNo].RegClass == -1) { 1508 unsigned Reg = MI.getOperand(OpNo).getReg(); 1509 1510 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1511 return MRI.getRegClass(Reg); 1512 return RI.getPhysRegClass(Reg); 1513 } 1514 1515 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1516 return RI.getRegClass(RCID); 1517} 1518 1519bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1520 switch (MI.getOpcode()) { 1521 case AMDGPU::COPY: 1522 case AMDGPU::REG_SEQUENCE: 1523 case AMDGPU::PHI: 1524 case AMDGPU::INSERT_SUBREG: 1525 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1526 default: 1527 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1528 } 1529} 1530 1531void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1532 MachineBasicBlock::iterator I = MI; 1533 MachineBasicBlock *MBB = MI->getParent(); 1534 MachineOperand &MO = MI->getOperand(OpIdx); 1535 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1536 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1537 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1538 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1539 if (MO.isReg()) 1540 Opcode = AMDGPU::COPY; 1541 else if (RI.isSGPRClass(RC)) 1542 Opcode = AMDGPU::S_MOV_B32; 1543 1544 1545 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1546 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1547 VRC = &AMDGPU::VReg_64RegClass; 1548 else 1549 VRC = &AMDGPU::VGPR_32RegClass; 1550 1551 unsigned Reg = MRI.createVirtualRegister(VRC); 1552 DebugLoc DL = MBB->findDebugLoc(I); 1553 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) 1554 .addOperand(MO); 1555 MO.ChangeToRegister(Reg, false); 1556} 1557 1558unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1559 MachineRegisterInfo &MRI, 1560 MachineOperand &SuperReg, 1561 const TargetRegisterClass *SuperRC, 1562 unsigned SubIdx, 1563 const TargetRegisterClass *SubRC) 1564 const { 1565 assert(SuperReg.isReg()); 1566 1567 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1568 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1569 1570 // Just in case the super register is itself a sub-register, copy it to a new 1571 // value so we don't need to worry about merging its subreg index with the 1572 // SubIdx passed to this function. The register coalescer should be able to 1573 // eliminate this extra copy. 1574 MachineBasicBlock *MBB = MI->getParent(); 1575 DebugLoc DL = MI->getDebugLoc(); 1576 1577 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1578 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1579 1580 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1581 .addReg(NewSuperReg, 0, SubIdx); 1582 1583 return SubReg; 1584} 1585 1586MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1587 MachineBasicBlock::iterator MII, 1588 MachineRegisterInfo &MRI, 1589 MachineOperand &Op, 1590 const TargetRegisterClass *SuperRC, 1591 unsigned SubIdx, 1592 const TargetRegisterClass *SubRC) const { 1593 if (Op.isImm()) { 1594 // XXX - Is there a better way to do this? 1595 if (SubIdx == AMDGPU::sub0) 1596 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1597 if (SubIdx == AMDGPU::sub1) 1598 return MachineOperand::CreateImm(Op.getImm() >> 32); 1599 1600 llvm_unreachable("Unhandled register index for immediate"); 1601 } 1602 1603 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1604 SubIdx, SubRC); 1605 return MachineOperand::CreateReg(SubReg, false); 1606} 1607 1608unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, 1609 MachineBasicBlock::iterator MI, 1610 MachineRegisterInfo &MRI, 1611 const TargetRegisterClass *RC, 1612 const MachineOperand &Op) const { 1613 MachineBasicBlock *MBB = MI->getParent(); 1614 DebugLoc DL = MI->getDebugLoc(); 1615 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1616 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1617 unsigned Dst = MRI.createVirtualRegister(RC); 1618 1619 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1620 LoDst) 1621 .addImm(Op.getImm() & 0xFFFFFFFF); 1622 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1623 HiDst) 1624 .addImm(Op.getImm() >> 32); 1625 1626 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) 1627 .addReg(LoDst) 1628 .addImm(AMDGPU::sub0) 1629 .addReg(HiDst) 1630 .addImm(AMDGPU::sub1); 1631 1632 Worklist.push_back(Lo); 1633 Worklist.push_back(Hi); 1634 1635 return Dst; 1636} 1637 1638// Change the order of operands from (0, 1, 2) to (0, 2, 1) 1639void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { 1640 assert(Inst->getNumExplicitOperands() == 3); 1641 MachineOperand Op1 = Inst->getOperand(1); 1642 Inst->RemoveOperand(1); 1643 Inst->addOperand(Op1); 1644} 1645 1646bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1647 const MachineOperand *MO) const { 1648 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1649 const MCInstrDesc &InstDesc = get(MI->getOpcode()); 1650 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1651 const TargetRegisterClass *DefinedRC = 1652 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1653 if (!MO) 1654 MO = &MI->getOperand(OpIdx); 1655 1656 if (isVALU(InstDesc.Opcode) && 1657 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 1658 unsigned SGPRUsed = 1659 MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; 1660 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1661 if (i == OpIdx) 1662 continue; 1663 const MachineOperand &Op = MI->getOperand(i); 1664 if (Op.isReg() && Op.getReg() != SGPRUsed && 1665 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { 1666 return false; 1667 } 1668 } 1669 } 1670 1671 if (MO->isReg()) { 1672 assert(DefinedRC); 1673 const TargetRegisterClass *RC = 1674 TargetRegisterInfo::isVirtualRegister(MO->getReg()) ? 1675 MRI.getRegClass(MO->getReg()) : 1676 RI.getPhysRegClass(MO->getReg()); 1677 1678 // In order to be legal, the common sub-class must be equal to the 1679 // class of the current operand. For example: 1680 // 1681 // v_mov_b32 s0 ; Operand defined as vsrc_32 1682 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1683 // 1684 // s_sendmsg 0, s0 ; Operand defined as m0reg 1685 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1686 1687 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1688 } 1689 1690 1691 // Handle non-register types that are treated like immediates. 1692 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 1693 1694 if (!DefinedRC) { 1695 // This operand expects an immediate. 1696 return true; 1697 } 1698 1699 return isImmOperandLegal(MI, OpIdx, *MO); 1700} 1701 1702void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 1703 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1704 1705 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1706 AMDGPU::OpName::src0); 1707 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1708 AMDGPU::OpName::src1); 1709 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1710 AMDGPU::OpName::src2); 1711 1712 // Legalize VOP2 1713 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 1714 // Legalize src0 1715 if (!isOperandLegal(MI, Src0Idx)) 1716 legalizeOpWithMove(MI, Src0Idx); 1717 1718 // Legalize src1 1719 if (isOperandLegal(MI, Src1Idx)) 1720 return; 1721 1722 // Usually src0 of VOP2 instructions allow more types of inputs 1723 // than src1, so try to commute the instruction to decrease our 1724 // chances of having to insert a MOV instruction to legalize src1. 1725 if (MI->isCommutable()) { 1726 if (commuteInstruction(MI)) 1727 // If we are successful in commuting, then we know MI is legal, so 1728 // we are done. 1729 return; 1730 } 1731 1732 legalizeOpWithMove(MI, Src1Idx); 1733 return; 1734 } 1735 1736 // XXX - Do any VOP3 instructions read VCC? 1737 // Legalize VOP3 1738 if (isVOP3(MI->getOpcode())) { 1739 int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; 1740 1741 // Find the one SGPR operand we are allowed to use. 1742 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 1743 1744 for (unsigned i = 0; i < 3; ++i) { 1745 int Idx = VOP3Idx[i]; 1746 if (Idx == -1) 1747 break; 1748 MachineOperand &MO = MI->getOperand(Idx); 1749 1750 if (MO.isReg()) { 1751 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 1752 continue; // VGPRs are legal 1753 1754 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 1755 1756 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 1757 SGPRReg = MO.getReg(); 1758 // We can use one SGPR in each VOP3 instruction. 1759 continue; 1760 } 1761 } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { 1762 // If it is not a register and not a literal constant, then it must be 1763 // an inline constant which is always legal. 1764 continue; 1765 } 1766 // If we make it this far, then the operand is not legal and we must 1767 // legalize it. 1768 legalizeOpWithMove(MI, Idx); 1769 } 1770 } 1771 1772 // Legalize REG_SEQUENCE and PHI 1773 // The register class of the operands much be the same type as the register 1774 // class of the output. 1775 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || 1776 MI->getOpcode() == AMDGPU::PHI) { 1777 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 1778 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1779 if (!MI->getOperand(i).isReg() || 1780 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1781 continue; 1782 const TargetRegisterClass *OpRC = 1783 MRI.getRegClass(MI->getOperand(i).getReg()); 1784 if (RI.hasVGPRs(OpRC)) { 1785 VRC = OpRC; 1786 } else { 1787 SRC = OpRC; 1788 } 1789 } 1790 1791 // If any of the operands are VGPR registers, then they all most be 1792 // otherwise we will create illegal VGPR->SGPR copies when legalizing 1793 // them. 1794 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 1795 if (!VRC) { 1796 assert(SRC); 1797 VRC = RI.getEquivalentVGPRClass(SRC); 1798 } 1799 RC = VRC; 1800 } else { 1801 RC = SRC; 1802 } 1803 1804 // Update all the operands so they have the same type. 1805 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1806 if (!MI->getOperand(i).isReg() || 1807 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1808 continue; 1809 unsigned DstReg = MRI.createVirtualRegister(RC); 1810 MachineBasicBlock *InsertBB; 1811 MachineBasicBlock::iterator Insert; 1812 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 1813 InsertBB = MI->getParent(); 1814 Insert = MI; 1815 } else { 1816 // MI is a PHI instruction. 1817 InsertBB = MI->getOperand(i + 1).getMBB(); 1818 Insert = InsertBB->getFirstTerminator(); 1819 } 1820 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), 1821 get(AMDGPU::COPY), DstReg) 1822 .addOperand(MI->getOperand(i)); 1823 MI->getOperand(i).setReg(DstReg); 1824 } 1825 } 1826 1827 // Legalize INSERT_SUBREG 1828 // src0 must have the same register class as dst 1829 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 1830 unsigned Dst = MI->getOperand(0).getReg(); 1831 unsigned Src0 = MI->getOperand(1).getReg(); 1832 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 1833 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 1834 if (DstRC != Src0RC) { 1835 MachineBasicBlock &MBB = *MI->getParent(); 1836 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 1837 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 1838 .addReg(Src0); 1839 MI->getOperand(1).setReg(NewSrc0); 1840 } 1841 return; 1842 } 1843 1844 // Legalize MUBUF* instructions 1845 // FIXME: If we start using the non-addr64 instructions for compute, we 1846 // may need to legalize them here. 1847 int SRsrcIdx = 1848 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 1849 if (SRsrcIdx != -1) { 1850 // We have an MUBUF instruction 1851 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); 1852 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; 1853 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 1854 RI.getRegClass(SRsrcRC))) { 1855 // The operands are legal. 1856 // FIXME: We may need to legalize operands besided srsrc. 1857 return; 1858 } 1859 1860 MachineBasicBlock &MBB = *MI->getParent(); 1861 // Extract the ptr from the resource descriptor. 1862 1863 // SRsrcPtrLo = srsrc:sub0 1864 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, 1865 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); 1866 1867 // SRsrcPtrHi = srsrc:sub1 1868 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, 1869 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); 1870 1871 // Create an empty resource descriptor 1872 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1873 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1874 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1875 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1876 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 1877 1878 // Zero64 = 0 1879 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1880 Zero64) 1881 .addImm(0); 1882 1883 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1884 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1885 SRsrcFormatLo) 1886 .addImm(RsrcDataFormat & 0xFFFFFFFF); 1887 1888 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1889 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1890 SRsrcFormatHi) 1891 .addImm(RsrcDataFormat >> 32); 1892 1893 // NewSRsrc = {Zero64, SRsrcFormat} 1894 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1895 NewSRsrc) 1896 .addReg(Zero64) 1897 .addImm(AMDGPU::sub0_sub1) 1898 .addReg(SRsrcFormatLo) 1899 .addImm(AMDGPU::sub2) 1900 .addReg(SRsrcFormatHi) 1901 .addImm(AMDGPU::sub3); 1902 1903 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1904 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 1905 unsigned NewVAddrLo; 1906 unsigned NewVAddrHi; 1907 if (VAddr) { 1908 // This is already an ADDR64 instruction so we need to add the pointer 1909 // extracted from the resource descriptor to the current value of VAddr. 1910 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1911 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1912 1913 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 1914 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 1915 NewVAddrLo) 1916 .addReg(SRsrcPtrLo) 1917 .addReg(VAddr->getReg(), 0, AMDGPU::sub0) 1918 .addReg(AMDGPU::VCC, RegState::ImplicitDefine); 1919 1920 // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 1921 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), 1922 NewVAddrHi) 1923 .addReg(SRsrcPtrHi) 1924 .addReg(VAddr->getReg(), 0, AMDGPU::sub1) 1925 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) 1926 .addReg(AMDGPU::VCC, RegState::Implicit); 1927 1928 } else { 1929 // This instructions is the _OFFSET variant, so we need to convert it to 1930 // ADDR64. 1931 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 1932 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 1933 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 1934 1935 // Create the new instruction. 1936 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 1937 MachineInstr *Addr64 = 1938 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 1939 .addOperand(*VData) 1940 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 1941 // This will be replaced later 1942 // with the new value of vaddr. 1943 .addOperand(*SRsrc) 1944 .addOperand(*SOffset) 1945 .addOperand(*Offset) 1946 .addImm(0) // glc 1947 .addImm(0) // slc 1948 .addImm(0); // tfe 1949 1950 MI->removeFromParent(); 1951 MI = Addr64; 1952 1953 NewVAddrLo = SRsrcPtrLo; 1954 NewVAddrHi = SRsrcPtrHi; 1955 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1956 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 1957 } 1958 1959 // NewVaddr = {NewVaddrHi, NewVaddrLo} 1960 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1961 NewVAddr) 1962 .addReg(NewVAddrLo) 1963 .addImm(AMDGPU::sub0) 1964 .addReg(NewVAddrHi) 1965 .addImm(AMDGPU::sub1); 1966 1967 1968 // Update the instruction to use NewVaddr 1969 VAddr->setReg(NewVAddr); 1970 // Update the instruction to use NewSRsrc 1971 SRsrc->setReg(NewSRsrc); 1972 } 1973} 1974 1975void SIInstrInfo::splitSMRD(MachineInstr *MI, 1976 const TargetRegisterClass *HalfRC, 1977 unsigned HalfImmOp, unsigned HalfSGPROp, 1978 MachineInstr *&Lo, MachineInstr *&Hi) const { 1979 1980 DebugLoc DL = MI->getDebugLoc(); 1981 MachineBasicBlock *MBB = MI->getParent(); 1982 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1983 unsigned RegLo = MRI.createVirtualRegister(HalfRC); 1984 unsigned RegHi = MRI.createVirtualRegister(HalfRC); 1985 unsigned HalfSize = HalfRC->getSize(); 1986 const MachineOperand *OffOp = 1987 getNamedOperand(*MI, AMDGPU::OpName::offset); 1988 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 1989 1990 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes 1991 // on VI. 1992 1993 bool IsKill = SBase->isKill(); 1994 if (OffOp) { 1995 bool isVI = 1996 MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= 1997 AMDGPUSubtarget::VOLCANIC_ISLANDS; 1998 unsigned OffScale = isVI ? 1 : 4; 1999 // Handle the _IMM variant 2000 unsigned LoOffset = OffOp->getImm() * OffScale; 2001 unsigned HiOffset = LoOffset + HalfSize; 2002 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) 2003 // Use addReg instead of addOperand 2004 // to make sure kill flag is cleared. 2005 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 2006 .addImm(LoOffset / OffScale); 2007 2008 if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { 2009 unsigned OffsetSGPR = 2010 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 2011 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) 2012 .addImm(HiOffset); // The offset in register is in bytes. 2013 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) 2014 .addReg(SBase->getReg(), getKillRegState(IsKill), 2015 SBase->getSubReg()) 2016 .addReg(OffsetSGPR); 2017 } else { 2018 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) 2019 .addReg(SBase->getReg(), getKillRegState(IsKill), 2020 SBase->getSubReg()) 2021 .addImm(HiOffset / OffScale); 2022 } 2023 } else { 2024 // Handle the _SGPR variant 2025 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); 2026 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) 2027 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 2028 .addOperand(*SOff); 2029 unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 2030 BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) 2031 .addOperand(*SOff) 2032 .addImm(HalfSize); 2033 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) 2034 .addReg(SBase->getReg(), getKillRegState(IsKill), 2035 SBase->getSubReg()) 2036 .addReg(OffsetSGPR); 2037 } 2038 2039 unsigned SubLo, SubHi; 2040 switch (HalfSize) { 2041 case 4: 2042 SubLo = AMDGPU::sub0; 2043 SubHi = AMDGPU::sub1; 2044 break; 2045 case 8: 2046 SubLo = AMDGPU::sub0_sub1; 2047 SubHi = AMDGPU::sub2_sub3; 2048 break; 2049 case 16: 2050 SubLo = AMDGPU::sub0_sub1_sub2_sub3; 2051 SubHi = AMDGPU::sub4_sub5_sub6_sub7; 2052 break; 2053 case 32: 2054 SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 2055 SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15; 2056 break; 2057 default: 2058 llvm_unreachable("Unhandled HalfSize"); 2059 } 2060 2061 BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE)) 2062 .addOperand(MI->getOperand(0)) 2063 .addReg(RegLo) 2064 .addImm(SubLo) 2065 .addReg(RegHi) 2066 .addImm(SubHi); 2067} 2068 2069void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { 2070 MachineBasicBlock *MBB = MI->getParent(); 2071 switch (MI->getOpcode()) { 2072 case AMDGPU::S_LOAD_DWORD_IMM: 2073 case AMDGPU::S_LOAD_DWORD_SGPR: 2074 case AMDGPU::S_LOAD_DWORDX2_IMM: 2075 case AMDGPU::S_LOAD_DWORDX2_SGPR: 2076 case AMDGPU::S_LOAD_DWORDX4_IMM: 2077 case AMDGPU::S_LOAD_DWORDX4_SGPR: { 2078 unsigned NewOpcode = getVALUOp(*MI); 2079 unsigned RegOffset; 2080 unsigned ImmOffset; 2081 2082 if (MI->getOperand(2).isReg()) { 2083 RegOffset = MI->getOperand(2).getReg(); 2084 ImmOffset = 0; 2085 } else { 2086 assert(MI->getOperand(2).isImm()); 2087 // SMRD instructions take a dword offsets on SI and byte offset on VI 2088 // and MUBUF instructions always take a byte offset. 2089 ImmOffset = MI->getOperand(2).getImm(); 2090 if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <= 2091 AMDGPUSubtarget::SEA_ISLANDS) 2092 ImmOffset <<= 2; 2093 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2094 2095 if (isUInt<12>(ImmOffset)) { 2096 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2097 RegOffset) 2098 .addImm(0); 2099 } else { 2100 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2101 RegOffset) 2102 .addImm(ImmOffset); 2103 ImmOffset = 0; 2104 } 2105 } 2106 2107 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2108 unsigned DWord0 = RegOffset; 2109 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2110 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2111 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2112 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2113 2114 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 2115 .addImm(0); 2116 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 2117 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2118 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 2119 .addImm(RsrcDataFormat >> 32); 2120 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 2121 .addReg(DWord0) 2122 .addImm(AMDGPU::sub0) 2123 .addReg(DWord1) 2124 .addImm(AMDGPU::sub1) 2125 .addReg(DWord2) 2126 .addImm(AMDGPU::sub2) 2127 .addReg(DWord3) 2128 .addImm(AMDGPU::sub3); 2129 MI->setDesc(get(NewOpcode)); 2130 if (MI->getOperand(2).isReg()) { 2131 MI->getOperand(2).setReg(SRsrc); 2132 } else { 2133 MI->getOperand(2).ChangeToRegister(SRsrc, false); 2134 } 2135 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); 2136 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 2137 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc 2138 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc 2139 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe 2140 2141 const TargetRegisterClass *NewDstRC = 2142 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); 2143 2144 unsigned DstReg = MI->getOperand(0).getReg(); 2145 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2146 MRI.replaceRegWith(DstReg, NewDstReg); 2147 break; 2148 } 2149 case AMDGPU::S_LOAD_DWORDX8_IMM: 2150 case AMDGPU::S_LOAD_DWORDX8_SGPR: { 2151 MachineInstr *Lo, *Hi; 2152 splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM, 2153 AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi); 2154 MI->eraseFromParent(); 2155 moveSMRDToVALU(Lo, MRI); 2156 moveSMRDToVALU(Hi, MRI); 2157 break; 2158 } 2159 2160 case AMDGPU::S_LOAD_DWORDX16_IMM: 2161 case AMDGPU::S_LOAD_DWORDX16_SGPR: { 2162 MachineInstr *Lo, *Hi; 2163 splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM, 2164 AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi); 2165 MI->eraseFromParent(); 2166 moveSMRDToVALU(Lo, MRI); 2167 moveSMRDToVALU(Hi, MRI); 2168 break; 2169 } 2170 } 2171} 2172 2173void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2174 SmallVector<MachineInstr *, 128> Worklist; 2175 Worklist.push_back(&TopInst); 2176 2177 while (!Worklist.empty()) { 2178 MachineInstr *Inst = Worklist.pop_back_val(); 2179 MachineBasicBlock *MBB = Inst->getParent(); 2180 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2181 2182 unsigned Opcode = Inst->getOpcode(); 2183 unsigned NewOpcode = getVALUOp(*Inst); 2184 2185 // Handle some special cases 2186 switch (Opcode) { 2187 default: 2188 if (isSMRD(Inst->getOpcode())) { 2189 moveSMRDToVALU(Inst, MRI); 2190 } 2191 break; 2192 case AMDGPU::S_MOV_B64: { 2193 DebugLoc DL = Inst->getDebugLoc(); 2194 2195 // If the source operand is a register we can replace this with a 2196 // copy. 2197 if (Inst->getOperand(1).isReg()) { 2198 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) 2199 .addOperand(Inst->getOperand(0)) 2200 .addOperand(Inst->getOperand(1)); 2201 Worklist.push_back(Copy); 2202 } else { 2203 // Otherwise, we need to split this into two movs, because there is 2204 // no 64-bit VALU move instruction. 2205 unsigned Reg = Inst->getOperand(0).getReg(); 2206 unsigned Dst = split64BitImm(Worklist, 2207 Inst, 2208 MRI, 2209 MRI.getRegClass(Reg), 2210 Inst->getOperand(1)); 2211 MRI.replaceRegWith(Reg, Dst); 2212 } 2213 Inst->eraseFromParent(); 2214 continue; 2215 } 2216 case AMDGPU::S_AND_B64: 2217 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); 2218 Inst->eraseFromParent(); 2219 continue; 2220 2221 case AMDGPU::S_OR_B64: 2222 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); 2223 Inst->eraseFromParent(); 2224 continue; 2225 2226 case AMDGPU::S_XOR_B64: 2227 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); 2228 Inst->eraseFromParent(); 2229 continue; 2230 2231 case AMDGPU::S_NOT_B64: 2232 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 2233 Inst->eraseFromParent(); 2234 continue; 2235 2236 case AMDGPU::S_BCNT1_I32_B64: 2237 splitScalar64BitBCNT(Worklist, Inst); 2238 Inst->eraseFromParent(); 2239 continue; 2240 2241 case AMDGPU::S_BFE_I64: { 2242 splitScalar64BitBFE(Worklist, Inst); 2243 Inst->eraseFromParent(); 2244 continue; 2245 } 2246 2247 case AMDGPU::S_LSHL_B32: 2248 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2249 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2250 swapOperands(Inst); 2251 } 2252 break; 2253 case AMDGPU::S_ASHR_I32: 2254 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2255 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2256 swapOperands(Inst); 2257 } 2258 break; 2259 case AMDGPU::S_LSHR_B32: 2260 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2261 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2262 swapOperands(Inst); 2263 } 2264 break; 2265 case AMDGPU::S_LSHL_B64: 2266 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2267 NewOpcode = AMDGPU::V_LSHLREV_B64; 2268 swapOperands(Inst); 2269 } 2270 break; 2271 case AMDGPU::S_ASHR_I64: 2272 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2273 NewOpcode = AMDGPU::V_ASHRREV_I64; 2274 swapOperands(Inst); 2275 } 2276 break; 2277 case AMDGPU::S_LSHR_B64: 2278 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2279 NewOpcode = AMDGPU::V_LSHRREV_B64; 2280 swapOperands(Inst); 2281 } 2282 break; 2283 2284 case AMDGPU::S_BFE_U64: 2285 case AMDGPU::S_BFM_B64: 2286 llvm_unreachable("Moving this op to VALU not implemented"); 2287 } 2288 2289 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2290 // We cannot move this instruction to the VALU, so we should try to 2291 // legalize its operands instead. 2292 legalizeOperands(Inst); 2293 continue; 2294 } 2295 2296 // Use the new VALU Opcode. 2297 const MCInstrDesc &NewDesc = get(NewOpcode); 2298 Inst->setDesc(NewDesc); 2299 2300 // Remove any references to SCC. Vector instructions can't read from it, and 2301 // We're just about to add the implicit use / defs of VCC, and we don't want 2302 // both. 2303 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 2304 MachineOperand &Op = Inst->getOperand(i); 2305 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 2306 Inst->RemoveOperand(i); 2307 } 2308 2309 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2310 // We are converting these to a BFE, so we need to add the missing 2311 // operands for the size and offset. 2312 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2313 Inst->addOperand(MachineOperand::CreateImm(0)); 2314 Inst->addOperand(MachineOperand::CreateImm(Size)); 2315 2316 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2317 // The VALU version adds the second operand to the result, so insert an 2318 // extra 0 operand. 2319 Inst->addOperand(MachineOperand::CreateImm(0)); 2320 } 2321 2322 addDescImplicitUseDef(NewDesc, Inst); 2323 2324 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2325 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 2326 // If we need to move this to VGPRs, we need to unpack the second operand 2327 // back into the 2 separate ones for bit offset and width. 2328 assert(OffsetWidthOp.isImm() && 2329 "Scalar BFE is only implemented for constant width and offset"); 2330 uint32_t Imm = OffsetWidthOp.getImm(); 2331 2332 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2333 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2334 Inst->RemoveOperand(2); // Remove old immediate. 2335 Inst->addOperand(MachineOperand::CreateImm(Offset)); 2336 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 2337 } 2338 2339 // Update the destination register class. 2340 2341 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 2342 2343 switch (Opcode) { 2344 // For target instructions, getOpRegClass just returns the virtual 2345 // register class associated with the operand, so we need to find an 2346 // equivalent VGPR register class in order to move the instruction to the 2347 // VALU. 2348 case AMDGPU::COPY: 2349 case AMDGPU::PHI: 2350 case AMDGPU::REG_SEQUENCE: 2351 case AMDGPU::INSERT_SUBREG: 2352 if (RI.hasVGPRs(NewDstRC)) 2353 continue; 2354 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2355 if (!NewDstRC) 2356 continue; 2357 break; 2358 default: 2359 break; 2360 } 2361 2362 unsigned DstReg = Inst->getOperand(0).getReg(); 2363 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2364 MRI.replaceRegWith(DstReg, NewDstReg); 2365 2366 // Legalize the operands 2367 legalizeOperands(Inst); 2368 2369 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 2370 E = MRI.use_end(); I != E; ++I) { 2371 MachineInstr &UseMI = *I->getParent(); 2372 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2373 Worklist.push_back(&UseMI); 2374 } 2375 } 2376 } 2377} 2378 2379//===----------------------------------------------------------------------===// 2380// Indirect addressing callbacks 2381//===----------------------------------------------------------------------===// 2382 2383unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 2384 unsigned Channel) const { 2385 assert(Channel == 0); 2386 return RegIndex; 2387} 2388 2389const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2390 return &AMDGPU::VGPR_32RegClass; 2391} 2392 2393void SIInstrInfo::splitScalar64BitUnaryOp( 2394 SmallVectorImpl<MachineInstr *> &Worklist, 2395 MachineInstr *Inst, 2396 unsigned Opcode) const { 2397 MachineBasicBlock &MBB = *Inst->getParent(); 2398 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2399 2400 MachineOperand &Dest = Inst->getOperand(0); 2401 MachineOperand &Src0 = Inst->getOperand(1); 2402 DebugLoc DL = Inst->getDebugLoc(); 2403 2404 MachineBasicBlock::iterator MII = Inst; 2405 2406 const MCInstrDesc &InstDesc = get(Opcode); 2407 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2408 MRI.getRegClass(Src0.getReg()) : 2409 &AMDGPU::SGPR_32RegClass; 2410 2411 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2412 2413 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2414 AMDGPU::sub0, Src0SubRC); 2415 2416 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2417 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2418 2419 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2420 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2421 .addOperand(SrcReg0Sub0); 2422 2423 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2424 AMDGPU::sub1, Src0SubRC); 2425 2426 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2427 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2428 .addOperand(SrcReg0Sub1); 2429 2430 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2431 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2432 .addReg(DestSub0) 2433 .addImm(AMDGPU::sub0) 2434 .addReg(DestSub1) 2435 .addImm(AMDGPU::sub1); 2436 2437 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2438 2439 // Try to legalize the operands in case we need to swap the order to keep it 2440 // valid. 2441 Worklist.push_back(LoHalf); 2442 Worklist.push_back(HiHalf); 2443} 2444 2445void SIInstrInfo::splitScalar64BitBinaryOp( 2446 SmallVectorImpl<MachineInstr *> &Worklist, 2447 MachineInstr *Inst, 2448 unsigned Opcode) const { 2449 MachineBasicBlock &MBB = *Inst->getParent(); 2450 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2451 2452 MachineOperand &Dest = Inst->getOperand(0); 2453 MachineOperand &Src0 = Inst->getOperand(1); 2454 MachineOperand &Src1 = Inst->getOperand(2); 2455 DebugLoc DL = Inst->getDebugLoc(); 2456 2457 MachineBasicBlock::iterator MII = Inst; 2458 2459 const MCInstrDesc &InstDesc = get(Opcode); 2460 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2461 MRI.getRegClass(Src0.getReg()) : 2462 &AMDGPU::SGPR_32RegClass; 2463 2464 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2465 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2466 MRI.getRegClass(Src1.getReg()) : 2467 &AMDGPU::SGPR_32RegClass; 2468 2469 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2470 2471 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2472 AMDGPU::sub0, Src0SubRC); 2473 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2474 AMDGPU::sub0, Src1SubRC); 2475 2476 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2477 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2478 2479 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2480 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2481 .addOperand(SrcReg0Sub0) 2482 .addOperand(SrcReg1Sub0); 2483 2484 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2485 AMDGPU::sub1, Src0SubRC); 2486 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2487 AMDGPU::sub1, Src1SubRC); 2488 2489 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2490 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2491 .addOperand(SrcReg0Sub1) 2492 .addOperand(SrcReg1Sub1); 2493 2494 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2495 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2496 .addReg(DestSub0) 2497 .addImm(AMDGPU::sub0) 2498 .addReg(DestSub1) 2499 .addImm(AMDGPU::sub1); 2500 2501 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2502 2503 // Try to legalize the operands in case we need to swap the order to keep it 2504 // valid. 2505 Worklist.push_back(LoHalf); 2506 Worklist.push_back(HiHalf); 2507} 2508 2509void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 2510 MachineInstr *Inst) const { 2511 MachineBasicBlock &MBB = *Inst->getParent(); 2512 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2513 2514 MachineBasicBlock::iterator MII = Inst; 2515 DebugLoc DL = Inst->getDebugLoc(); 2516 2517 MachineOperand &Dest = Inst->getOperand(0); 2518 MachineOperand &Src = Inst->getOperand(1); 2519 2520 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2521 const TargetRegisterClass *SrcRC = Src.isReg() ? 2522 MRI.getRegClass(Src.getReg()) : 2523 &AMDGPU::SGPR_32RegClass; 2524 2525 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2526 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2527 2528 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2529 2530 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2531 AMDGPU::sub0, SrcSubRC); 2532 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2533 AMDGPU::sub1, SrcSubRC); 2534 2535 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) 2536 .addOperand(SrcRegSub0) 2537 .addImm(0); 2538 2539 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2540 .addOperand(SrcRegSub1) 2541 .addReg(MidReg); 2542 2543 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2544 2545 Worklist.push_back(First); 2546 Worklist.push_back(Second); 2547} 2548 2549void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2550 MachineInstr *Inst) const { 2551 MachineBasicBlock &MBB = *Inst->getParent(); 2552 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2553 MachineBasicBlock::iterator MII = Inst; 2554 DebugLoc DL = Inst->getDebugLoc(); 2555 2556 MachineOperand &Dest = Inst->getOperand(0); 2557 uint32_t Imm = Inst->getOperand(2).getImm(); 2558 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2559 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2560 2561 (void) Offset; 2562 2563 // Only sext_inreg cases handled. 2564 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && 2565 BitWidth <= 32 && 2566 Offset == 0 && 2567 "Not implemented"); 2568 2569 if (BitWidth < 32) { 2570 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2571 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2572 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2573 2574 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2575 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) 2576 .addImm(0) 2577 .addImm(BitWidth); 2578 2579 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2580 .addImm(31) 2581 .addReg(MidRegLo); 2582 2583 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2584 .addReg(MidRegLo) 2585 .addImm(AMDGPU::sub0) 2586 .addReg(MidRegHi) 2587 .addImm(AMDGPU::sub1); 2588 2589 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2590 return; 2591 } 2592 2593 MachineOperand &Src = Inst->getOperand(1); 2594 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2595 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2596 2597 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2598 .addImm(31) 2599 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2600 2601 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2602 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2603 .addImm(AMDGPU::sub0) 2604 .addReg(TmpReg) 2605 .addImm(AMDGPU::sub1); 2606 2607 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2608} 2609 2610void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, 2611 MachineInstr *Inst) const { 2612 // Add the implict and explicit register definitions. 2613 if (NewDesc.ImplicitUses) { 2614 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { 2615 unsigned Reg = NewDesc.ImplicitUses[i]; 2616 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); 2617 } 2618 } 2619 2620 if (NewDesc.ImplicitDefs) { 2621 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { 2622 unsigned Reg = NewDesc.ImplicitDefs[i]; 2623 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 2624 } 2625 } 2626} 2627 2628unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, 2629 int OpIndices[3]) const { 2630 const MCInstrDesc &Desc = get(MI->getOpcode()); 2631 2632 // Find the one SGPR operand we are allowed to use. 2633 unsigned SGPRReg = AMDGPU::NoRegister; 2634 2635 // First we need to consider the instruction's operand requirements before 2636 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2637 // of VCC, but we are still bound by the constant bus requirement to only use 2638 // one. 2639 // 2640 // If the operand's class is an SGPR, we can never move it. 2641 2642 for (const MachineOperand &MO : MI->implicit_operands()) { 2643 // We only care about reads. 2644 if (MO.isDef()) 2645 continue; 2646 2647 if (MO.getReg() == AMDGPU::VCC) 2648 return AMDGPU::VCC; 2649 2650 if (MO.getReg() == AMDGPU::FLAT_SCR) 2651 return AMDGPU::FLAT_SCR; 2652 } 2653 2654 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2655 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2656 2657 for (unsigned i = 0; i < 3; ++i) { 2658 int Idx = OpIndices[i]; 2659 if (Idx == -1) 2660 break; 2661 2662 const MachineOperand &MO = MI->getOperand(Idx); 2663 if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) 2664 SGPRReg = MO.getReg(); 2665 2666 if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2667 UsedSGPRs[i] = MO.getReg(); 2668 } 2669 2670 if (SGPRReg != AMDGPU::NoRegister) 2671 return SGPRReg; 2672 2673 // We don't have a required SGPR operand, so we have a bit more freedom in 2674 // selecting operands to move. 2675 2676 // Try to select the most used SGPR. If an SGPR is equal to one of the 2677 // others, we choose that. 2678 // 2679 // e.g. 2680 // V_FMA_F32 v0, s0, s0, s0 -> No moves 2681 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 2682 2683 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 2684 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 2685 SGPRReg = UsedSGPRs[0]; 2686 } 2687 2688 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 2689 if (UsedSGPRs[1] == UsedSGPRs[2]) 2690 SGPRReg = UsedSGPRs[1]; 2691 } 2692 2693 return SGPRReg; 2694} 2695 2696MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 2697 MachineBasicBlock *MBB, 2698 MachineBasicBlock::iterator I, 2699 unsigned ValueReg, 2700 unsigned Address, unsigned OffsetReg) const { 2701 const DebugLoc &DL = MBB->findDebugLoc(I); 2702 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2703 getIndirectIndexBegin(*MBB->getParent())); 2704 2705 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 2706 .addReg(IndirectBaseReg, RegState::Define) 2707 .addOperand(I->getOperand(0)) 2708 .addReg(IndirectBaseReg) 2709 .addReg(OffsetReg) 2710 .addImm(0) 2711 .addReg(ValueReg); 2712} 2713 2714MachineInstrBuilder SIInstrInfo::buildIndirectRead( 2715 MachineBasicBlock *MBB, 2716 MachineBasicBlock::iterator I, 2717 unsigned ValueReg, 2718 unsigned Address, unsigned OffsetReg) const { 2719 const DebugLoc &DL = MBB->findDebugLoc(I); 2720 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2721 getIndirectIndexBegin(*MBB->getParent())); 2722 2723 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 2724 .addOperand(I->getOperand(0)) 2725 .addOperand(I->getOperand(1)) 2726 .addReg(IndirectBaseReg) 2727 .addReg(OffsetReg) 2728 .addImm(0); 2729 2730} 2731 2732void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 2733 const MachineFunction &MF) const { 2734 int End = getIndirectIndexEnd(MF); 2735 int Begin = getIndirectIndexBegin(MF); 2736 2737 if (End == -1) 2738 return; 2739 2740 2741 for (int Index = Begin; Index <= End; ++Index) 2742 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); 2743 2744 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 2745 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 2746 2747 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 2748 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 2749 2750 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 2751 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 2752 2753 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 2754 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 2755 2756 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 2757 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 2758} 2759 2760MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 2761 unsigned OperandName) const { 2762 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 2763 if (Idx == -1) 2764 return nullptr; 2765 2766 return &MI.getOperand(Idx); 2767} 2768 2769uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 2770 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 2771 if (ST.isAmdHsaOS()) { 2772 RsrcDataFormat |= (1ULL << 56); 2773 2774 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 2775 // Set MTYPE = 2 2776 RsrcDataFormat |= (2ULL << 59); 2777 } 2778 2779 return RsrcDataFormat; 2780} 2781