R600OptimizeVectorRegisters.cpp revision 296417
1//===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// This pass merges inputs of swizzeable instructions into vector sharing 12/// common data and/or have enough undef subreg using swizzle abilities. 13/// 14/// For instance let's consider the following pseudo code : 15/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16/// ... 17/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19/// 20/// is turned into : 21/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22/// ... 23/// vreg7<def> = INSERT_SUBREG vreg4, sub3 24/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25/// 26/// This allow regalloc to reduce register pressure for vector registers and 27/// to reduce MOV count. 28//===----------------------------------------------------------------------===// 29 30#include "AMDGPU.h" 31#include "AMDGPUSubtarget.h" 32#include "R600InstrInfo.h" 33#include "llvm/CodeGen/DFAPacketizer.h" 34#include "llvm/CodeGen/MachineDominators.h" 35#include "llvm/CodeGen/MachineFunctionPass.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineLoopInfo.h" 38#include "llvm/CodeGen/MachineRegisterInfo.h" 39#include "llvm/CodeGen/Passes.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/raw_ostream.h" 42 43using namespace llvm; 44 45#define DEBUG_TYPE "vec-merger" 46 47namespace { 48 49static bool 50isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 51 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 52 E = MRI.def_instr_end(); It != E; ++It) { 53 return (*It).isImplicitDef(); 54 } 55 if (MRI.isReserved(Reg)) { 56 return false; 57 } 58 llvm_unreachable("Reg without a def"); 59 return false; 60} 61 62class RegSeqInfo { 63public: 64 MachineInstr *Instr; 65 DenseMap<unsigned, unsigned> RegToChan; 66 std::vector<unsigned> UndefReg; 67 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 68 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 69 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 70 MachineOperand &MO = Instr->getOperand(i); 71 unsigned Chan = Instr->getOperand(i + 1).getImm(); 72 if (isImplicitlyDef(MRI, MO.getReg())) 73 UndefReg.push_back(Chan); 74 else 75 RegToChan[MO.getReg()] = Chan; 76 } 77 } 78 RegSeqInfo() {} 79 80 bool operator==(const RegSeqInfo &RSI) const { 81 return RSI.Instr == Instr; 82 } 83}; 84 85class R600VectorRegMerger : public MachineFunctionPass { 86private: 87 MachineRegisterInfo *MRI; 88 const R600InstrInfo *TII; 89 bool canSwizzle(const MachineInstr &) const; 90 bool areAllUsesSwizzeable(unsigned Reg) const; 91 void SwizzleInput(MachineInstr &, 92 const std::vector<std::pair<unsigned, unsigned> > &) const; 93 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 94 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 95 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 96 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 97 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 98 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 99 MachineInstr *RebuildVector(RegSeqInfo *MI, 100 const RegSeqInfo *BaseVec, 101 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 102 void RemoveMI(MachineInstr *); 103 void trackRSI(const RegSeqInfo &RSI); 104 105 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 106 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 107 InstructionSetMap PreviousRegSeqByReg; 108 InstructionSetMap PreviousRegSeqByUndefCount; 109public: 110 static char ID; 111 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 112 TII(nullptr) { } 113 114 void getAnalysisUsage(AnalysisUsage &AU) const override { 115 AU.setPreservesCFG(); 116 AU.addRequired<MachineDominatorTree>(); 117 AU.addPreserved<MachineDominatorTree>(); 118 AU.addRequired<MachineLoopInfo>(); 119 AU.addPreserved<MachineLoopInfo>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121 } 122 123 const char *getPassName() const override { 124 return "R600 Vector Registers Merge Pass"; 125 } 126 127 bool runOnMachineFunction(MachineFunction &Fn) override; 128}; 129 130char R600VectorRegMerger::ID = 0; 131 132bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 133 const { 134 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 135 return true; 136 switch (MI.getOpcode()) { 137 case AMDGPU::R600_ExportSwz: 138 case AMDGPU::EG_ExportSwz: 139 return true; 140 default: 141 return false; 142 } 143} 144 145bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 146 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 147 const { 148 unsigned CurrentUndexIdx = 0; 149 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 150 E = ToMerge->RegToChan.end(); It != E; ++It) { 151 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 152 Untouched->RegToChan.find((*It).first); 153 if (PosInUntouched != Untouched->RegToChan.end()) { 154 Remap.push_back(std::pair<unsigned, unsigned> 155 ((*It).second, (*PosInUntouched).second)); 156 continue; 157 } 158 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 159 return false; 160 Remap.push_back(std::pair<unsigned, unsigned> 161 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 162 } 163 164 return true; 165} 166 167static 168unsigned getReassignedChan( 169 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 170 unsigned Chan) { 171 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 172 if (RemapChan[j].first == Chan) 173 return RemapChan[j].second; 174 } 175 llvm_unreachable("Chan wasn't reassigned"); 176} 177 178MachineInstr *R600VectorRegMerger::RebuildVector( 179 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 180 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 181 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 182 MachineBasicBlock::iterator Pos = RSI->Instr; 183 MachineBasicBlock &MBB = *Pos->getParent(); 184 DebugLoc DL = Pos->getDebugLoc(); 185 186 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 187 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 188 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 189 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 190 E = RSI->RegToChan.end(); It != E; ++It) { 191 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 192 unsigned SubReg = (*It).first; 193 unsigned Swizzle = (*It).second; 194 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 195 196 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 197 DstReg) 198 .addReg(SrcVec) 199 .addReg(SubReg) 200 .addImm(Chan); 201 UpdatedRegToChan[SubReg] = Chan; 202 std::vector<unsigned>::iterator ChanPos = 203 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 204 if (ChanPos != UpdatedUndef.end()) 205 UpdatedUndef.erase(ChanPos); 206 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 207 UpdatedUndef.end() && 208 "UpdatedUndef shouldn't contain Chan more than once!"); 209 DEBUG(dbgs() << " ->"; Tmp->dump();); 210 (void)Tmp; 211 SrcVec = DstReg; 212 } 213 Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 214 .addReg(SrcVec); 215 DEBUG(dbgs() << " ->"; Pos->dump();); 216 217 DEBUG(dbgs() << " Updating Swizzle:\n"); 218 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 219 E = MRI->use_instr_end(); It != E; ++It) { 220 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 221 SwizzleInput(*It, RemapChan); 222 DEBUG((*It).dump()); 223 } 224 RSI->Instr->eraseFromParent(); 225 226 // Update RSI 227 RSI->Instr = Pos; 228 RSI->RegToChan = UpdatedRegToChan; 229 RSI->UndefReg = UpdatedUndef; 230 231 return Pos; 232} 233 234void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 235 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 236 E = PreviousRegSeqByReg.end(); It != E; ++It) { 237 std::vector<MachineInstr *> &MIs = (*It).second; 238 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 239 } 240 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 241 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 242 std::vector<MachineInstr *> &MIs = (*It).second; 243 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 244 } 245} 246 247void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 248 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 249 unsigned Offset; 250 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 251 Offset = 2; 252 else 253 Offset = 3; 254 for (unsigned i = 0; i < 4; i++) { 255 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 256 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 257 if (RemapChan[j].first == Swizzle) { 258 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 259 break; 260 } 261 } 262 } 263} 264 265bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 266 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 267 E = MRI->use_instr_end(); It != E; ++It) { 268 if (!canSwizzle(*It)) 269 return false; 270 } 271 return true; 272} 273 274bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 275 RegSeqInfo &CompatibleRSI, 276 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 277 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 278 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 279 if (!MOp->isReg()) 280 continue; 281 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 282 continue; 283 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 284 CompatibleRSI = PreviousRegSeq[MI]; 285 if (RSI == CompatibleRSI) 286 continue; 287 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 288 return true; 289 } 290 } 291 return false; 292} 293 294bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 295 RegSeqInfo &CompatibleRSI, 296 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 297 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 298 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 299 return false; 300 std::vector<MachineInstr *> &MIs = 301 PreviousRegSeqByUndefCount[NeededUndefs]; 302 CompatibleRSI = PreviousRegSeq[MIs.back()]; 303 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 304 return true; 305} 306 307void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 308 for (DenseMap<unsigned, unsigned>::const_iterator 309 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 310 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 311 } 312 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 313 PreviousRegSeq[RSI.Instr] = RSI; 314} 315 316bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 317 TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 318 MRI = &(Fn.getRegInfo()); 319 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 320 MBB != MBBe; ++MBB) { 321 MachineBasicBlock *MB = &*MBB; 322 PreviousRegSeq.clear(); 323 PreviousRegSeqByReg.clear(); 324 PreviousRegSeqByUndefCount.clear(); 325 326 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 327 MII != MIIE; ++MII) { 328 MachineInstr *MI = MII; 329 if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 330 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 331 unsigned Reg = MI->getOperand(1).getReg(); 332 for (MachineRegisterInfo::def_instr_iterator 333 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 334 It != E; ++It) { 335 RemoveMI(&(*It)); 336 } 337 } 338 continue; 339 } 340 341 342 RegSeqInfo RSI(*MRI, MI); 343 344 // All uses of MI are swizzeable ? 345 unsigned Reg = MI->getOperand(0).getReg(); 346 if (!areAllUsesSwizzeable(Reg)) 347 continue; 348 349 DEBUG (dbgs() << "Trying to optimize "; 350 MI->dump(); 351 ); 352 353 RegSeqInfo CandidateRSI; 354 std::vector<std::pair<unsigned, unsigned> > RemapChan; 355 DEBUG(dbgs() << "Using common slots...\n";); 356 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 357 // Remove CandidateRSI mapping 358 RemoveMI(CandidateRSI.Instr); 359 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 360 trackRSI(RSI); 361 continue; 362 } 363 DEBUG(dbgs() << "Using free slots...\n";); 364 RemapChan.clear(); 365 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 366 RemoveMI(CandidateRSI.Instr); 367 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 368 trackRSI(RSI); 369 continue; 370 } 371 //Failed to merge 372 trackRSI(RSI); 373 } 374 } 375 return false; 376} 377 378} 379 380llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 381 return new R600VectorRegMerger(tm); 382} 383