R600InstrInfo.cpp revision 321369
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600InstrInfo.h" 16#include "AMDGPU.h" 17#include "AMDGPUInstrInfo.h" 18#include "AMDGPUSubtarget.h" 19#include "R600Defines.h" 20#include "R600FrameLowering.h" 21#include "R600RegisterInfo.h" 22#include "Utils/AMDGPUBaseInfo.h" 23#include "llvm/ADT/BitVector.h" 24#include "llvm/ADT/SmallSet.h" 25#include "llvm/ADT/SmallVector.h" 26#include "llvm/CodeGen/MachineBasicBlock.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstr.h" 30#include "llvm/CodeGen/MachineInstrBuilder.h" 31#include "llvm/CodeGen/MachineOperand.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Target/TargetRegisterInfo.h" 35#include "llvm/Target/TargetSubtargetInfo.h" 36#include <algorithm> 37#include <cassert> 38#include <cstdint> 39#include <cstring> 40#include <iterator> 41#include <utility> 42#include <vector> 43 44using namespace llvm; 45 46#define GET_INSTRINFO_CTOR_DTOR 47#include "AMDGPUGenDFAPacketizer.inc" 48 49R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) 50 : AMDGPUInstrInfo(ST), RI(), ST(ST) {} 51 52bool R600InstrInfo::isVector(const MachineInstr &MI) const { 53 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 54} 55 56void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 57 MachineBasicBlock::iterator MI, 58 const DebugLoc &DL, unsigned DestReg, 59 unsigned SrcReg, bool KillSrc) const { 60 unsigned VectorComponents = 0; 61 if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || 62 AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && 63 (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || 64 AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { 65 VectorComponents = 4; 66 } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || 67 AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && 68 (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || 69 AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { 70 VectorComponents = 2; 71 } 72 73 if (VectorComponents > 0) { 74 for (unsigned I = 0; I < VectorComponents; I++) { 75 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 76 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 77 RI.getSubReg(DestReg, SubRegIndex), 78 RI.getSubReg(SrcReg, SubRegIndex)) 79 .addReg(DestReg, 80 RegState::Define | RegState::Implicit); 81 } 82 } else { 83 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 84 DestReg, SrcReg); 85 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 86 .setIsKill(KillSrc); 87 } 88} 89 90/// \returns true if \p MBBI can be moved into a new basic. 91bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 92 MachineBasicBlock::iterator MBBI) const { 93 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 94 E = MBBI->operands_end(); I != E; ++I) { 95 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 96 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 97 return false; 98 } 99 return true; 100} 101 102bool R600InstrInfo::isMov(unsigned Opcode) const { 103 switch(Opcode) { 104 default: 105 return false; 106 case AMDGPU::MOV: 107 case AMDGPU::MOV_IMM_F32: 108 case AMDGPU::MOV_IMM_I32: 109 return true; 110 } 111} 112 113bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 114 return false; 115} 116 117bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 118 switch(Opcode) { 119 default: return false; 120 case AMDGPU::CUBE_r600_pseudo: 121 case AMDGPU::CUBE_r600_real: 122 case AMDGPU::CUBE_eg_pseudo: 123 case AMDGPU::CUBE_eg_real: 124 return true; 125 } 126} 127 128bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 129 unsigned TargetFlags = get(Opcode).TSFlags; 130 131 return (TargetFlags & R600_InstFlag::ALU_INST); 132} 133 134bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 135 unsigned TargetFlags = get(Opcode).TSFlags; 136 137 return ((TargetFlags & R600_InstFlag::OP1) | 138 (TargetFlags & R600_InstFlag::OP2) | 139 (TargetFlags & R600_InstFlag::OP3)); 140} 141 142bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 143 unsigned TargetFlags = get(Opcode).TSFlags; 144 145 return ((TargetFlags & R600_InstFlag::LDS_1A) | 146 (TargetFlags & R600_InstFlag::LDS_1A1D) | 147 (TargetFlags & R600_InstFlag::LDS_1A2D)); 148} 149 150bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 151 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 152} 153 154bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { 155 if (isALUInstr(MI.getOpcode())) 156 return true; 157 if (isVector(MI) || isCubeOp(MI.getOpcode())) 158 return true; 159 switch (MI.getOpcode()) { 160 case AMDGPU::PRED_X: 161 case AMDGPU::INTERP_PAIR_XY: 162 case AMDGPU::INTERP_PAIR_ZW: 163 case AMDGPU::INTERP_VEC_LOAD: 164 case AMDGPU::COPY: 165 case AMDGPU::DOT_4: 166 return true; 167 default: 168 return false; 169 } 170} 171 172bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 173 if (ST.hasCaymanISA()) 174 return false; 175 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 176} 177 178bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { 179 return isTransOnly(MI.getOpcode()); 180} 181 182bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 183 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 184} 185 186bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { 187 return isVectorOnly(MI.getOpcode()); 188} 189 190bool R600InstrInfo::isExport(unsigned Opcode) const { 191 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 192} 193 194bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 195 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 196} 197 198bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { 199 const MachineFunction *MF = MI.getParent()->getParent(); 200 return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && 201 usesVertexCache(MI.getOpcode()); 202} 203 204bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 205 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 206} 207 208bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { 209 const MachineFunction *MF = MI.getParent()->getParent(); 210 return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && 211 usesVertexCache(MI.getOpcode())) || 212 usesTextureCache(MI.getOpcode()); 213} 214 215bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 216 switch (Opcode) { 217 case AMDGPU::KILLGT: 218 case AMDGPU::GROUP_BARRIER: 219 return true; 220 default: 221 return false; 222 } 223} 224 225bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { 226 return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 227} 228 229bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { 230 return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 231} 232 233bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { 234 if (!isALUInstr(MI.getOpcode())) { 235 return false; 236 } 237 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 238 E = MI.operands_end(); 239 I != E; ++I) { 240 if (!I->isReg() || !I->isUse() || 241 TargetRegisterInfo::isVirtualRegister(I->getReg())) 242 continue; 243 244 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 245 return true; 246 } 247 return false; 248} 249 250int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 251 static const unsigned SrcSelTable[][2] = { 252 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 253 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 254 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 255 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 256 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 257 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 258 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 259 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 260 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 261 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 262 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 263 }; 264 265 for (const auto &Row : SrcSelTable) { 266 if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) { 267 return getOperandIdx(Opcode, Row[1]); 268 } 269 } 270 return -1; 271} 272 273SmallVector<std::pair<MachineOperand *, int64_t>, 3> 274R600InstrInfo::getSrcs(MachineInstr &MI) const { 275 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 276 277 if (MI.getOpcode() == AMDGPU::DOT_4) { 278 static const unsigned OpTable[8][2] = { 279 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 280 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 281 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 282 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 283 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 284 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 285 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 286 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 287 }; 288 289 for (unsigned j = 0; j < 8; j++) { 290 MachineOperand &MO = 291 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); 292 unsigned Reg = MO.getReg(); 293 if (Reg == AMDGPU::ALU_CONST) { 294 MachineOperand &Sel = 295 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); 296 Result.push_back(std::make_pair(&MO, Sel.getImm())); 297 continue; 298 } 299 300 } 301 return Result; 302 } 303 304 static const unsigned OpTable[3][2] = { 305 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 306 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 307 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 308 }; 309 310 for (unsigned j = 0; j < 3; j++) { 311 int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]); 312 if (SrcIdx < 0) 313 break; 314 MachineOperand &MO = MI.getOperand(SrcIdx); 315 unsigned Reg = MO.getReg(); 316 if (Reg == AMDGPU::ALU_CONST) { 317 MachineOperand &Sel = 318 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); 319 Result.push_back(std::make_pair(&MO, Sel.getImm())); 320 continue; 321 } 322 if (Reg == AMDGPU::ALU_LITERAL_X) { 323 MachineOperand &Operand = 324 MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); 325 if (Operand.isImm()) { 326 Result.push_back(std::make_pair(&MO, Operand.getImm())); 327 continue; 328 } 329 assert(Operand.isGlobal()); 330 } 331 Result.push_back(std::make_pair(&MO, 0)); 332 } 333 return Result; 334} 335 336std::vector<std::pair<int, unsigned>> 337R600InstrInfo::ExtractSrcs(MachineInstr &MI, 338 const DenseMap<unsigned, unsigned> &PV, 339 unsigned &ConstCount) const { 340 ConstCount = 0; 341 const std::pair<int, unsigned> DummyPair(-1, 0); 342 std::vector<std::pair<int, unsigned>> Result; 343 unsigned i = 0; 344 for (const auto &Src : getSrcs(MI)) { 345 ++i; 346 unsigned Reg = Src.first->getReg(); 347 int Index = RI.getEncodingValue(Reg) & 0xff; 348 if (Reg == AMDGPU::OQAP) { 349 Result.push_back(std::make_pair(Index, 0U)); 350 } 351 if (PV.find(Reg) != PV.end()) { 352 // 255 is used to tells its a PS/PV reg 353 Result.push_back(std::make_pair(255, 0U)); 354 continue; 355 } 356 if (Index > 127) { 357 ConstCount++; 358 Result.push_back(DummyPair); 359 continue; 360 } 361 unsigned Chan = RI.getHWRegChan(Reg); 362 Result.push_back(std::make_pair(Index, Chan)); 363 } 364 for (; i < 3; ++i) 365 Result.push_back(DummyPair); 366 return Result; 367} 368 369static std::vector<std::pair<int, unsigned>> 370Swizzle(std::vector<std::pair<int, unsigned>> Src, 371 R600InstrInfo::BankSwizzle Swz) { 372 if (Src[0] == Src[1]) 373 Src[1].first = -1; 374 switch (Swz) { 375 case R600InstrInfo::ALU_VEC_012_SCL_210: 376 break; 377 case R600InstrInfo::ALU_VEC_021_SCL_122: 378 std::swap(Src[1], Src[2]); 379 break; 380 case R600InstrInfo::ALU_VEC_102_SCL_221: 381 std::swap(Src[0], Src[1]); 382 break; 383 case R600InstrInfo::ALU_VEC_120_SCL_212: 384 std::swap(Src[0], Src[1]); 385 std::swap(Src[0], Src[2]); 386 break; 387 case R600InstrInfo::ALU_VEC_201: 388 std::swap(Src[0], Src[2]); 389 std::swap(Src[0], Src[1]); 390 break; 391 case R600InstrInfo::ALU_VEC_210: 392 std::swap(Src[0], Src[2]); 393 break; 394 } 395 return Src; 396} 397 398static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 399 switch (Swz) { 400 case R600InstrInfo::ALU_VEC_012_SCL_210: { 401 unsigned Cycles[3] = { 2, 1, 0}; 402 return Cycles[Op]; 403 } 404 case R600InstrInfo::ALU_VEC_021_SCL_122: { 405 unsigned Cycles[3] = { 1, 2, 2}; 406 return Cycles[Op]; 407 } 408 case R600InstrInfo::ALU_VEC_120_SCL_212: { 409 unsigned Cycles[3] = { 2, 1, 2}; 410 return Cycles[Op]; 411 } 412 case R600InstrInfo::ALU_VEC_102_SCL_221: { 413 unsigned Cycles[3] = { 2, 2, 1}; 414 return Cycles[Op]; 415 } 416 default: 417 llvm_unreachable("Wrong Swizzle for Trans Slot"); 418 } 419} 420 421/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 422/// in the same Instruction Group while meeting read port limitations given a 423/// Swz swizzle sequence. 424unsigned R600InstrInfo::isLegalUpTo( 425 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, 426 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 427 const std::vector<std::pair<int, unsigned>> &TransSrcs, 428 R600InstrInfo::BankSwizzle TransSwz) const { 429 int Vector[4][3]; 430 memset(Vector, -1, sizeof(Vector)); 431 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 432 const std::vector<std::pair<int, unsigned>> &Srcs = 433 Swizzle(IGSrcs[i], Swz[i]); 434 for (unsigned j = 0; j < 3; j++) { 435 const std::pair<int, unsigned> &Src = Srcs[j]; 436 if (Src.first < 0 || Src.first == 255) 437 continue; 438 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 439 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 440 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 441 // The value from output queue A (denoted by register OQAP) can 442 // only be fetched during the first cycle. 443 return false; 444 } 445 // OQAP does not count towards the normal read port restrictions 446 continue; 447 } 448 if (Vector[Src.second][j] < 0) 449 Vector[Src.second][j] = Src.first; 450 if (Vector[Src.second][j] != Src.first) 451 return i; 452 } 453 } 454 // Now check Trans Alu 455 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 456 const std::pair<int, unsigned> &Src = TransSrcs[i]; 457 unsigned Cycle = getTransSwizzle(TransSwz, i); 458 if (Src.first < 0) 459 continue; 460 if (Src.first == 255) 461 continue; 462 if (Vector[Src.second][Cycle] < 0) 463 Vector[Src.second][Cycle] = Src.first; 464 if (Vector[Src.second][Cycle] != Src.first) 465 return IGSrcs.size() - 1; 466 } 467 return IGSrcs.size(); 468} 469 470/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 471/// (in lexicographic term) swizzle sequence assuming that all swizzles after 472/// Idx can be skipped 473static bool 474NextPossibleSolution( 475 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 476 unsigned Idx) { 477 assert(Idx < SwzCandidate.size()); 478 int ResetIdx = Idx; 479 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 480 ResetIdx --; 481 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 482 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 483 } 484 if (ResetIdx == -1) 485 return false; 486 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 487 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 488 return true; 489} 490 491/// Enumerate all possible Swizzle sequence to find one that can meet all 492/// read port requirements. 493bool R600InstrInfo::FindSwizzleForVectorSlot( 494 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, 495 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 496 const std::vector<std::pair<int, unsigned>> &TransSrcs, 497 R600InstrInfo::BankSwizzle TransSwz) const { 498 unsigned ValidUpTo = 0; 499 do { 500 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 501 if (ValidUpTo == IGSrcs.size()) 502 return true; 503 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 504 return false; 505} 506 507/// Instructions in Trans slot can't read gpr at cycle 0 if they also read 508/// a const, and can't read a gpr at cycle 1 if they read 2 const. 509static bool 510isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 511 const std::vector<std::pair<int, unsigned>> &TransOps, 512 unsigned ConstCount) { 513 // TransALU can't read 3 constants 514 if (ConstCount > 2) 515 return false; 516 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 517 const std::pair<int, unsigned> &Src = TransOps[i]; 518 unsigned Cycle = getTransSwizzle(TransSwz, i); 519 if (Src.first < 0) 520 continue; 521 if (ConstCount > 0 && Cycle == 0) 522 return false; 523 if (ConstCount > 1 && Cycle == 1) 524 return false; 525 } 526 return true; 527} 528 529bool 530R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 531 const DenseMap<unsigned, unsigned> &PV, 532 std::vector<BankSwizzle> &ValidSwizzle, 533 bool isLastAluTrans) 534 const { 535 //Todo : support shared src0 - src1 operand 536 537 std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs; 538 ValidSwizzle.clear(); 539 unsigned ConstCount; 540 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 541 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 542 IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); 543 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 544 AMDGPU::OpName::bank_swizzle); 545 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 546 IG[i]->getOperand(Op).getImm()); 547 } 548 std::vector<std::pair<int, unsigned>> TransOps; 549 if (!isLastAluTrans) 550 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 551 552 TransOps = std::move(IGSrcs.back()); 553 IGSrcs.pop_back(); 554 ValidSwizzle.pop_back(); 555 556 static const R600InstrInfo::BankSwizzle TransSwz[] = { 557 ALU_VEC_012_SCL_210, 558 ALU_VEC_021_SCL_122, 559 ALU_VEC_120_SCL_212, 560 ALU_VEC_102_SCL_221 561 }; 562 for (unsigned i = 0; i < 4; i++) { 563 TransBS = TransSwz[i]; 564 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 565 continue; 566 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 567 TransBS); 568 if (Result) { 569 ValidSwizzle.push_back(TransBS); 570 return true; 571 } 572 } 573 574 return false; 575} 576 577bool 578R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 579 const { 580 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 581 unsigned Pair1 = 0, Pair2 = 0; 582 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 583 unsigned ReadConstHalf = Consts[i] & 2; 584 unsigned ReadConstIndex = Consts[i] & (~3); 585 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 586 if (!Pair1) { 587 Pair1 = ReadHalfConst; 588 continue; 589 } 590 if (Pair1 == ReadHalfConst) 591 continue; 592 if (!Pair2) { 593 Pair2 = ReadHalfConst; 594 continue; 595 } 596 if (Pair2 != ReadHalfConst) 597 return false; 598 } 599 return true; 600} 601 602bool 603R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 604 const { 605 std::vector<unsigned> Consts; 606 SmallSet<int64_t, 4> Literals; 607 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 608 MachineInstr &MI = *MIs[i]; 609 if (!isALUInstr(MI.getOpcode())) 610 continue; 611 612 for (const auto &Src : getSrcs(MI)) { 613 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 614 Literals.insert(Src.second); 615 if (Literals.size() > 4) 616 return false; 617 if (Src.first->getReg() == AMDGPU::ALU_CONST) 618 Consts.push_back(Src.second); 619 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 620 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 621 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 622 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 623 Consts.push_back((Index << 2) | Chan); 624 } 625 } 626 } 627 return fitsConstReadLimitations(Consts); 628} 629 630DFAPacketizer * 631R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const { 632 const InstrItineraryData *II = STI.getInstrItineraryData(); 633 return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II); 634} 635 636static bool 637isPredicateSetter(unsigned Opcode) { 638 switch (Opcode) { 639 case AMDGPU::PRED_X: 640 return true; 641 default: 642 return false; 643 } 644} 645 646static MachineInstr * 647findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 648 MachineBasicBlock::iterator I) { 649 while (I != MBB.begin()) { 650 --I; 651 MachineInstr &MI = *I; 652 if (isPredicateSetter(MI.getOpcode())) 653 return &MI; 654 } 655 656 return nullptr; 657} 658 659static 660bool isJump(unsigned Opcode) { 661 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 662} 663 664static bool isBranch(unsigned Opcode) { 665 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 666 Opcode == AMDGPU::BRANCH_COND_f32; 667} 668 669bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 670 MachineBasicBlock *&TBB, 671 MachineBasicBlock *&FBB, 672 SmallVectorImpl<MachineOperand> &Cond, 673 bool AllowModify) const { 674 // Most of the following comes from the ARM implementation of AnalyzeBranch 675 676 // If the block has no terminators, it just falls into the block after it. 677 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 678 if (I == MBB.end()) 679 return false; 680 681 // AMDGPU::BRANCH* instructions are only available after isel and are not 682 // handled 683 if (isBranch(I->getOpcode())) 684 return true; 685 if (!isJump(I->getOpcode())) { 686 return false; 687 } 688 689 // Remove successive JUMP 690 while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { 691 MachineBasicBlock::iterator PriorI = std::prev(I); 692 if (AllowModify) 693 I->removeFromParent(); 694 I = PriorI; 695 } 696 MachineInstr &LastInst = *I; 697 698 // If there is only one terminator instruction, process it. 699 unsigned LastOpc = LastInst.getOpcode(); 700 if (I == MBB.begin() || !isJump((--I)->getOpcode())) { 701 if (LastOpc == AMDGPU::JUMP) { 702 TBB = LastInst.getOperand(0).getMBB(); 703 return false; 704 } else if (LastOpc == AMDGPU::JUMP_COND) { 705 auto predSet = I; 706 while (!isPredicateSetter(predSet->getOpcode())) { 707 predSet = --I; 708 } 709 TBB = LastInst.getOperand(0).getMBB(); 710 Cond.push_back(predSet->getOperand(1)); 711 Cond.push_back(predSet->getOperand(2)); 712 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 713 return false; 714 } 715 return true; // Can't handle indirect branch. 716 } 717 718 // Get the instruction before it if it is a terminator. 719 MachineInstr &SecondLastInst = *I; 720 unsigned SecondLastOpc = SecondLastInst.getOpcode(); 721 722 // If the block ends with a B and a Bcc, handle it. 723 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 724 auto predSet = --I; 725 while (!isPredicateSetter(predSet->getOpcode())) { 726 predSet = --I; 727 } 728 TBB = SecondLastInst.getOperand(0).getMBB(); 729 FBB = LastInst.getOperand(0).getMBB(); 730 Cond.push_back(predSet->getOperand(1)); 731 Cond.push_back(predSet->getOperand(2)); 732 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 733 return false; 734 } 735 736 // Otherwise, can't handle this. 737 return true; 738} 739 740static 741MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 742 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 743 It != E; ++It) { 744 if (It->getOpcode() == AMDGPU::CF_ALU || 745 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 746 return It.getReverse(); 747 } 748 return MBB.end(); 749} 750 751unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, 752 MachineBasicBlock *TBB, 753 MachineBasicBlock *FBB, 754 ArrayRef<MachineOperand> Cond, 755 const DebugLoc &DL, 756 int *BytesAdded) const { 757 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 758 assert(!BytesAdded && "code size not handled"); 759 760 if (!FBB) { 761 if (Cond.empty()) { 762 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 763 return 1; 764 } else { 765 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 766 assert(PredSet && "No previous predicate !"); 767 addFlag(*PredSet, 0, MO_FLAG_PUSH); 768 PredSet->getOperand(2).setImm(Cond[1].getImm()); 769 770 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 771 .addMBB(TBB) 772 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 773 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 774 if (CfAlu == MBB.end()) 775 return 1; 776 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 777 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 778 return 1; 779 } 780 } else { 781 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 782 assert(PredSet && "No previous predicate !"); 783 addFlag(*PredSet, 0, MO_FLAG_PUSH); 784 PredSet->getOperand(2).setImm(Cond[1].getImm()); 785 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 786 .addMBB(TBB) 787 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 788 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 789 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 790 if (CfAlu == MBB.end()) 791 return 2; 792 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 793 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 794 return 2; 795 } 796} 797 798unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, 799 int *BytesRemoved) const { 800 assert(!BytesRemoved && "code size not handled"); 801 802 // Note : we leave PRED* instructions there. 803 // They may be needed when predicating instructions. 804 805 MachineBasicBlock::iterator I = MBB.end(); 806 807 if (I == MBB.begin()) { 808 return 0; 809 } 810 --I; 811 switch (I->getOpcode()) { 812 default: 813 return 0; 814 case AMDGPU::JUMP_COND: { 815 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 816 clearFlag(*predSet, 0, MO_FLAG_PUSH); 817 I->eraseFromParent(); 818 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 819 if (CfAlu == MBB.end()) 820 break; 821 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 822 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 823 break; 824 } 825 case AMDGPU::JUMP: 826 I->eraseFromParent(); 827 break; 828 } 829 I = MBB.end(); 830 831 if (I == MBB.begin()) { 832 return 1; 833 } 834 --I; 835 switch (I->getOpcode()) { 836 // FIXME: only one case?? 837 default: 838 return 1; 839 case AMDGPU::JUMP_COND: { 840 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 841 clearFlag(*predSet, 0, MO_FLAG_PUSH); 842 I->eraseFromParent(); 843 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 844 if (CfAlu == MBB.end()) 845 break; 846 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 847 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 848 break; 849 } 850 case AMDGPU::JUMP: 851 I->eraseFromParent(); 852 break; 853 } 854 return 2; 855} 856 857bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { 858 int idx = MI.findFirstPredOperandIdx(); 859 if (idx < 0) 860 return false; 861 862 unsigned Reg = MI.getOperand(idx).getReg(); 863 switch (Reg) { 864 default: return false; 865 case AMDGPU::PRED_SEL_ONE: 866 case AMDGPU::PRED_SEL_ZERO: 867 case AMDGPU::PREDICATE_BIT: 868 return true; 869 } 870} 871 872bool R600InstrInfo::isPredicable(const MachineInstr &MI) const { 873 // XXX: KILL* instructions can be predicated, but they must be the last 874 // instruction in a clause, so this means any instructions after them cannot 875 // be predicated. Until we have proper support for instruction clauses in the 876 // backend, we will mark KILL* instructions as unpredicable. 877 878 if (MI.getOpcode() == AMDGPU::KILLGT) { 879 return false; 880 } else if (MI.getOpcode() == AMDGPU::CF_ALU) { 881 // If the clause start in the middle of MBB then the MBB has more 882 // than a single clause, unable to predicate several clauses. 883 if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI)) 884 return false; 885 // TODO: We don't support KC merging atm 886 return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0; 887 } else if (isVector(MI)) { 888 return false; 889 } else { 890 return AMDGPUInstrInfo::isPredicable(MI); 891 } 892} 893 894bool 895R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 896 unsigned NumCycles, 897 unsigned ExtraPredCycles, 898 BranchProbability Probability) const{ 899 return true; 900} 901 902bool 903R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 904 unsigned NumTCycles, 905 unsigned ExtraTCycles, 906 MachineBasicBlock &FMBB, 907 unsigned NumFCycles, 908 unsigned ExtraFCycles, 909 BranchProbability Probability) const { 910 return true; 911} 912 913bool 914R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 915 unsigned NumCycles, 916 BranchProbability Probability) 917 const { 918 return true; 919} 920 921bool 922R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 923 MachineBasicBlock &FMBB) const { 924 return false; 925} 926 927bool 928R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 929 MachineOperand &MO = Cond[1]; 930 switch (MO.getImm()) { 931 case AMDGPU::PRED_SETE_INT: 932 MO.setImm(AMDGPU::PRED_SETNE_INT); 933 break; 934 case AMDGPU::PRED_SETNE_INT: 935 MO.setImm(AMDGPU::PRED_SETE_INT); 936 break; 937 case AMDGPU::PRED_SETE: 938 MO.setImm(AMDGPU::PRED_SETNE); 939 break; 940 case AMDGPU::PRED_SETNE: 941 MO.setImm(AMDGPU::PRED_SETE); 942 break; 943 default: 944 return true; 945 } 946 947 MachineOperand &MO2 = Cond[2]; 948 switch (MO2.getReg()) { 949 case AMDGPU::PRED_SEL_ZERO: 950 MO2.setReg(AMDGPU::PRED_SEL_ONE); 951 break; 952 case AMDGPU::PRED_SEL_ONE: 953 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 954 break; 955 default: 956 return true; 957 } 958 return false; 959} 960 961bool R600InstrInfo::DefinesPredicate(MachineInstr &MI, 962 std::vector<MachineOperand> &Pred) const { 963 return isPredicateSetter(MI.getOpcode()); 964} 965 966bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, 967 ArrayRef<MachineOperand> Pred) const { 968 int PIdx = MI.findFirstPredOperandIdx(); 969 970 if (MI.getOpcode() == AMDGPU::CF_ALU) { 971 MI.getOperand(8).setImm(0); 972 return true; 973 } 974 975 if (MI.getOpcode() == AMDGPU::DOT_4) { 976 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) 977 .setReg(Pred[2].getReg()); 978 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) 979 .setReg(Pred[2].getReg()); 980 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) 981 .setReg(Pred[2].getReg()); 982 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) 983 .setReg(Pred[2].getReg()); 984 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 985 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 986 return true; 987 } 988 989 if (PIdx != -1) { 990 MachineOperand &PMO = MI.getOperand(PIdx); 991 PMO.setReg(Pred[2].getReg()); 992 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 993 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 994 return true; 995 } 996 997 return false; 998} 999 1000unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const { 1001 return 2; 1002} 1003 1004unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1005 const MachineInstr &, 1006 unsigned *PredCost) const { 1007 if (PredCost) 1008 *PredCost = 2; 1009 return 2; 1010} 1011 1012unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1013 unsigned Channel) const { 1014 assert(Channel == 0); 1015 return RegIndex; 1016} 1017 1018bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1019 switch (MI.getOpcode()) { 1020 default: { 1021 MachineBasicBlock *MBB = MI.getParent(); 1022 int OffsetOpIdx = 1023 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); 1024 // addr is a custom operand with multiple MI operands, and only the 1025 // first MI operand is given a name. 1026 int RegOpIdx = OffsetOpIdx + 1; 1027 int ChanOpIdx = 1028 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); 1029 if (isRegisterLoad(MI)) { 1030 int DstOpIdx = 1031 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); 1032 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); 1033 unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); 1034 unsigned Address = calculateIndirectAddress(RegIndex, Channel); 1035 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); 1036 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { 1037 buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), 1038 getIndirectAddrRegClass()->getRegister(Address)); 1039 } else { 1040 buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address, 1041 OffsetReg); 1042 } 1043 } else if (isRegisterStore(MI)) { 1044 int ValOpIdx = 1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); 1046 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); 1047 unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); 1048 unsigned Address = calculateIndirectAddress(RegIndex, Channel); 1049 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); 1050 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { 1051 buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), 1052 MI.getOperand(ValOpIdx).getReg()); 1053 } else { 1054 buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(), 1055 calculateIndirectAddress(RegIndex, Channel), 1056 OffsetReg); 1057 } 1058 } else { 1059 return false; 1060 } 1061 1062 MBB->erase(MI); 1063 return true; 1064 } 1065 case AMDGPU::R600_EXTRACT_ELT_V2: 1066 case AMDGPU::R600_EXTRACT_ELT_V4: 1067 buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), 1068 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address 1069 MI.getOperand(2).getReg(), 1070 RI.getHWRegChan(MI.getOperand(1).getReg())); 1071 break; 1072 case AMDGPU::R600_INSERT_ELT_V2: 1073 case AMDGPU::R600_INSERT_ELT_V4: 1074 buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value 1075 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address 1076 MI.getOperand(3).getReg(), // Offset 1077 RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel 1078 break; 1079 } 1080 MI.eraseFromParent(); 1081 return true; 1082} 1083 1084void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1085 const MachineFunction &MF) const { 1086 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 1087 const R600FrameLowering *TFL = ST.getFrameLowering(); 1088 1089 unsigned StackWidth = TFL->getStackWidth(MF); 1090 int End = getIndirectIndexEnd(MF); 1091 1092 if (End == -1) 1093 return; 1094 1095 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1096 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1097 Reserved.set(SuperReg); 1098 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1099 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1100 Reserved.set(Reg); 1101 } 1102 } 1103} 1104 1105const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1106 return &AMDGPU::R600_TReg32_XRegClass; 1107} 1108 1109MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1110 MachineBasicBlock::iterator I, 1111 unsigned ValueReg, unsigned Address, 1112 unsigned OffsetReg) const { 1113 return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0); 1114} 1115 1116MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1117 MachineBasicBlock::iterator I, 1118 unsigned ValueReg, unsigned Address, 1119 unsigned OffsetReg, 1120 unsigned AddrChan) const { 1121 unsigned AddrReg; 1122 switch (AddrChan) { 1123 default: llvm_unreachable("Invalid Channel"); 1124 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 1125 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 1126 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 1127 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 1128 } 1129 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1130 AMDGPU::AR_X, OffsetReg); 1131 setImmOperand(*MOVA, AMDGPU::OpName::write, 0); 1132 1133 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1134 AddrReg, ValueReg) 1135 .addReg(AMDGPU::AR_X, 1136 RegState::Implicit | RegState::Kill); 1137 setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); 1138 return Mov; 1139} 1140 1141MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1142 MachineBasicBlock::iterator I, 1143 unsigned ValueReg, unsigned Address, 1144 unsigned OffsetReg) const { 1145 return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0); 1146} 1147 1148MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1149 MachineBasicBlock::iterator I, 1150 unsigned ValueReg, unsigned Address, 1151 unsigned OffsetReg, 1152 unsigned AddrChan) const { 1153 unsigned AddrReg; 1154 switch (AddrChan) { 1155 default: llvm_unreachable("Invalid Channel"); 1156 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 1157 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 1158 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 1159 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 1160 } 1161 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1162 AMDGPU::AR_X, 1163 OffsetReg); 1164 setImmOperand(*MOVA, AMDGPU::OpName::write, 0); 1165 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1166 ValueReg, 1167 AddrReg) 1168 .addReg(AMDGPU::AR_X, 1169 RegState::Implicit | RegState::Kill); 1170 setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); 1171 1172 return Mov; 1173} 1174 1175int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { 1176 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1177 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1178 int Offset = -1; 1179 1180 if (MFI.getNumObjects() == 0) { 1181 return -1; 1182 } 1183 1184 if (MRI.livein_empty()) { 1185 return 0; 1186 } 1187 1188 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); 1189 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), 1190 LE = MRI.livein_end(); 1191 LI != LE; ++LI) { 1192 unsigned Reg = LI->first; 1193 if (TargetRegisterInfo::isVirtualRegister(Reg) || 1194 !IndirectRC->contains(Reg)) 1195 continue; 1196 1197 unsigned RegIndex; 1198 unsigned RegEnd; 1199 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; 1200 ++RegIndex) { 1201 if (IndirectRC->getRegister(RegIndex) == Reg) 1202 break; 1203 } 1204 Offset = std::max(Offset, (int)RegIndex); 1205 } 1206 1207 return Offset + 1; 1208} 1209 1210int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { 1211 int Offset = 0; 1212 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1213 1214 // Variable sized objects are not supported 1215 if (MFI.hasVarSizedObjects()) { 1216 return -1; 1217 } 1218 1219 if (MFI.getNumObjects() == 0) { 1220 return -1; 1221 } 1222 1223 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 1224 const R600FrameLowering *TFL = ST.getFrameLowering(); 1225 1226 unsigned IgnoredFrameReg; 1227 Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg); 1228 1229 return getIndirectIndexBegin(MF) + Offset; 1230} 1231 1232unsigned R600InstrInfo::getMaxAlusPerClause() const { 1233 return 115; 1234} 1235 1236MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1237 MachineBasicBlock::iterator I, 1238 unsigned Opcode, 1239 unsigned DstReg, 1240 unsigned Src0Reg, 1241 unsigned Src1Reg) const { 1242 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1243 DstReg); // $dst 1244 1245 if (Src1Reg) { 1246 MIB.addImm(0) // $update_exec_mask 1247 .addImm(0); // $update_predicate 1248 } 1249 MIB.addImm(1) // $write 1250 .addImm(0) // $omod 1251 .addImm(0) // $dst_rel 1252 .addImm(0) // $dst_clamp 1253 .addReg(Src0Reg) // $src0 1254 .addImm(0) // $src0_neg 1255 .addImm(0) // $src0_rel 1256 .addImm(0) // $src0_abs 1257 .addImm(-1); // $src0_sel 1258 1259 if (Src1Reg) { 1260 MIB.addReg(Src1Reg) // $src1 1261 .addImm(0) // $src1_neg 1262 .addImm(0) // $src1_rel 1263 .addImm(0) // $src1_abs 1264 .addImm(-1); // $src1_sel 1265 } 1266 1267 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1268 //scheduling to the backend, we can change the default to 0. 1269 MIB.addImm(1) // $last 1270 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1271 .addImm(0) // $literal 1272 .addImm(0); // $bank_swizzle 1273 1274 return MIB; 1275} 1276 1277#define OPERAND_CASE(Label) \ 1278 case Label: { \ 1279 static const unsigned Ops[] = \ 1280 { \ 1281 Label##_X, \ 1282 Label##_Y, \ 1283 Label##_Z, \ 1284 Label##_W \ 1285 }; \ 1286 return Ops[Slot]; \ 1287 } 1288 1289static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1290 switch (Op) { 1291 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1292 OPERAND_CASE(AMDGPU::OpName::update_pred) 1293 OPERAND_CASE(AMDGPU::OpName::write) 1294 OPERAND_CASE(AMDGPU::OpName::omod) 1295 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1296 OPERAND_CASE(AMDGPU::OpName::clamp) 1297 OPERAND_CASE(AMDGPU::OpName::src0) 1298 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1299 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1300 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1301 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1302 OPERAND_CASE(AMDGPU::OpName::src1) 1303 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1304 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1305 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1306 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1307 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1308 default: 1309 llvm_unreachable("Wrong Operand"); 1310 } 1311} 1312 1313#undef OPERAND_CASE 1314 1315MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1316 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1317 const { 1318 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1319 unsigned Opcode; 1320 if (ST.getGeneration() <= R600Subtarget::R700) 1321 Opcode = AMDGPU::DOT4_r600; 1322 else 1323 Opcode = AMDGPU::DOT4_eg; 1324 MachineBasicBlock::iterator I = MI; 1325 MachineOperand &Src0 = MI->getOperand( 1326 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1327 MachineOperand &Src1 = MI->getOperand( 1328 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1329 MachineInstr *MIB = buildDefaultInstruction( 1330 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1331 static const unsigned Operands[14] = { 1332 AMDGPU::OpName::update_exec_mask, 1333 AMDGPU::OpName::update_pred, 1334 AMDGPU::OpName::write, 1335 AMDGPU::OpName::omod, 1336 AMDGPU::OpName::dst_rel, 1337 AMDGPU::OpName::clamp, 1338 AMDGPU::OpName::src0_neg, 1339 AMDGPU::OpName::src0_rel, 1340 AMDGPU::OpName::src0_abs, 1341 AMDGPU::OpName::src0_sel, 1342 AMDGPU::OpName::src1_neg, 1343 AMDGPU::OpName::src1_rel, 1344 AMDGPU::OpName::src1_abs, 1345 AMDGPU::OpName::src1_sel, 1346 }; 1347 1348 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1349 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1350 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1351 .setReg(MO.getReg()); 1352 1353 for (unsigned i = 0; i < 14; i++) { 1354 MachineOperand &MO = MI->getOperand( 1355 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1356 assert (MO.isImm()); 1357 setImmOperand(*MIB, Operands[i], MO.getImm()); 1358 } 1359 MIB->getOperand(20).setImm(0); 1360 return MIB; 1361} 1362 1363MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1364 MachineBasicBlock::iterator I, 1365 unsigned DstReg, 1366 uint64_t Imm) const { 1367 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1368 AMDGPU::ALU_LITERAL_X); 1369 setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); 1370 return MovImm; 1371} 1372 1373MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1374 MachineBasicBlock::iterator I, 1375 unsigned DstReg, unsigned SrcReg) const { 1376 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1377} 1378 1379int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1380 return getOperandIdx(MI.getOpcode(), Op); 1381} 1382 1383int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1384 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1385} 1386 1387void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, 1388 int64_t Imm) const { 1389 int Idx = getOperandIdx(MI, Op); 1390 assert(Idx != -1 && "Operand not supported for this instruction."); 1391 assert(MI.getOperand(Idx).isImm()); 1392 MI.getOperand(Idx).setImm(Imm); 1393} 1394 1395//===----------------------------------------------------------------------===// 1396// Instruction flag getters/setters 1397//===----------------------------------------------------------------------===// 1398 1399MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx, 1400 unsigned Flag) const { 1401 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1402 int FlagIndex = 0; 1403 if (Flag != 0) { 1404 // If we pass something other than the default value of Flag to this 1405 // function, it means we are want to set a flag on an instruction 1406 // that uses native encoding. 1407 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1408 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1409 switch (Flag) { 1410 case MO_FLAG_CLAMP: 1411 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); 1412 break; 1413 case MO_FLAG_MASK: 1414 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); 1415 break; 1416 case MO_FLAG_NOT_LAST: 1417 case MO_FLAG_LAST: 1418 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); 1419 break; 1420 case MO_FLAG_NEG: 1421 switch (SrcIdx) { 1422 case 0: 1423 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); 1424 break; 1425 case 1: 1426 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); 1427 break; 1428 case 2: 1429 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); 1430 break; 1431 } 1432 break; 1433 1434 case MO_FLAG_ABS: 1435 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1436 "instructions."); 1437 (void)IsOP3; 1438 switch (SrcIdx) { 1439 case 0: 1440 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); 1441 break; 1442 case 1: 1443 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); 1444 break; 1445 } 1446 break; 1447 1448 default: 1449 FlagIndex = -1; 1450 break; 1451 } 1452 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1453 } else { 1454 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1455 assert(FlagIndex != 0 && 1456 "Instruction flags not supported for this instruction"); 1457 } 1458 1459 MachineOperand &FlagOp = MI.getOperand(FlagIndex); 1460 assert(FlagOp.isImm()); 1461 return FlagOp; 1462} 1463 1464void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand, 1465 unsigned Flag) const { 1466 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1467 if (Flag == 0) { 1468 return; 1469 } 1470 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1471 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1472 if (Flag == MO_FLAG_NOT_LAST) { 1473 clearFlag(MI, Operand, MO_FLAG_LAST); 1474 } else if (Flag == MO_FLAG_MASK) { 1475 clearFlag(MI, Operand, Flag); 1476 } else { 1477 FlagOp.setImm(1); 1478 } 1479 } else { 1480 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1481 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1482 } 1483} 1484 1485void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, 1486 unsigned Flag) const { 1487 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1488 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1489 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1490 FlagOp.setImm(0); 1491 } else { 1492 MachineOperand &FlagOp = getFlagOp(MI); 1493 unsigned InstFlags = FlagOp.getImm(); 1494 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1495 FlagOp.setImm(InstFlags); 1496 } 1497} 1498