1259698Sdim//===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim/// \file 11259698Sdim/// This pass merges inputs of swizzeable instructions into vector sharing 12259698Sdim/// common data and/or have enough undef subreg using swizzle abilities. 13259698Sdim/// 14259698Sdim/// For instance let's consider the following pseudo code : 15259698Sdim/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16259698Sdim/// ... 17259698Sdim/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18259698Sdim/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19259698Sdim/// 20259698Sdim/// is turned into : 21259698Sdim/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22259698Sdim/// ... 23259698Sdim/// vreg7<def> = INSERT_SUBREG vreg4, sub3 24259698Sdim/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25259698Sdim/// 26259698Sdim/// This allow regalloc to reduce register pressure for vector registers and 27259698Sdim/// to reduce MOV count. 28259698Sdim//===----------------------------------------------------------------------===// 29259698Sdim 30259698Sdim#define DEBUG_TYPE "vec-merger" 31259698Sdim#include "llvm/Support/Debug.h" 32259698Sdim#include "AMDGPU.h" 33259698Sdim#include "R600InstrInfo.h" 34259698Sdim#include "llvm/CodeGen/DFAPacketizer.h" 35259698Sdim#include "llvm/CodeGen/MachineDominators.h" 36259698Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 37259698Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 38259698Sdim#include "llvm/CodeGen/Passes.h" 39259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 40259698Sdim#include "llvm/Support/raw_ostream.h" 41259698Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 42259698Sdim 43259698Sdimusing namespace llvm; 44259698Sdim 45259698Sdimnamespace { 46259698Sdim 47259698Sdimstatic bool 48259698SdimisImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 49259698Sdim for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg), 50259698Sdim E = MRI.def_end(); It != E; ++It) { 51259698Sdim return (*It).isImplicitDef(); 52259698Sdim } 53259698Sdim if (MRI.isReserved(Reg)) { 54259698Sdim return false; 55259698Sdim } 56259698Sdim llvm_unreachable("Reg without a def"); 57259698Sdim return false; 58259698Sdim} 59259698Sdim 60259698Sdimclass RegSeqInfo { 61259698Sdimpublic: 62259698Sdim MachineInstr *Instr; 63259698Sdim DenseMap<unsigned, unsigned> RegToChan; 64259698Sdim std::vector<unsigned> UndefReg; 65259698Sdim RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 66259698Sdim assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); 67259698Sdim for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 68259698Sdim MachineOperand &MO = Instr->getOperand(i); 69259698Sdim unsigned Chan = Instr->getOperand(i + 1).getImm(); 70259698Sdim if (isImplicitlyDef(MRI, MO.getReg())) 71259698Sdim UndefReg.push_back(Chan); 72259698Sdim else 73259698Sdim RegToChan[MO.getReg()] = Chan; 74259698Sdim } 75259698Sdim } 76259698Sdim RegSeqInfo() {} 77259698Sdim 78259698Sdim bool operator==(const RegSeqInfo &RSI) const { 79259698Sdim return RSI.Instr == Instr; 80259698Sdim } 81259698Sdim}; 82259698Sdim 83259698Sdimclass R600VectorRegMerger : public MachineFunctionPass { 84259698Sdimprivate: 85259698Sdim MachineRegisterInfo *MRI; 86259698Sdim const R600InstrInfo *TII; 87259698Sdim bool canSwizzle(const MachineInstr &) const; 88259698Sdim bool areAllUsesSwizzeable(unsigned Reg) const; 89259698Sdim void SwizzleInput(MachineInstr &, 90259698Sdim const std::vector<std::pair<unsigned, unsigned> > &) const; 91259698Sdim bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 92259698Sdim std::vector<std::pair<unsigned, unsigned> > &Remap) const; 93259698Sdim bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 94259698Sdim std::vector<std::pair<unsigned, unsigned> > &RemapChan); 95259698Sdim bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 96259698Sdim std::vector<std::pair<unsigned, unsigned> > &RemapChan); 97259698Sdim MachineInstr *RebuildVector(RegSeqInfo *MI, 98259698Sdim const RegSeqInfo *BaseVec, 99259698Sdim const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 100259698Sdim void RemoveMI(MachineInstr *); 101259698Sdim void trackRSI(const RegSeqInfo &RSI); 102259698Sdim 103259698Sdim typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 104259698Sdim DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 105259698Sdim InstructionSetMap PreviousRegSeqByReg; 106259698Sdim InstructionSetMap PreviousRegSeqByUndefCount; 107259698Sdimpublic: 108259698Sdim static char ID; 109259698Sdim R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 110259698Sdim TII(0) { } 111259698Sdim 112259698Sdim void getAnalysisUsage(AnalysisUsage &AU) const { 113259698Sdim AU.setPreservesCFG(); 114259698Sdim AU.addRequired<MachineDominatorTree>(); 115259698Sdim AU.addPreserved<MachineDominatorTree>(); 116259698Sdim AU.addRequired<MachineLoopInfo>(); 117259698Sdim AU.addPreserved<MachineLoopInfo>(); 118259698Sdim MachineFunctionPass::getAnalysisUsage(AU); 119259698Sdim } 120259698Sdim 121259698Sdim const char *getPassName() const { 122259698Sdim return "R600 Vector Registers Merge Pass"; 123259698Sdim } 124259698Sdim 125259698Sdim bool runOnMachineFunction(MachineFunction &Fn); 126259698Sdim}; 127259698Sdim 128259698Sdimchar R600VectorRegMerger::ID = 0; 129259698Sdim 130259698Sdimbool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 131259698Sdim const { 132259698Sdim if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 133259698Sdim return true; 134259698Sdim switch (MI.getOpcode()) { 135259698Sdim case AMDGPU::R600_ExportSwz: 136259698Sdim case AMDGPU::EG_ExportSwz: 137259698Sdim return true; 138259698Sdim default: 139259698Sdim return false; 140259698Sdim } 141259698Sdim} 142259698Sdim 143259698Sdimbool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 144259698Sdim RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 145259698Sdim const { 146259698Sdim unsigned CurrentUndexIdx = 0; 147259698Sdim for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 148259698Sdim E = ToMerge->RegToChan.end(); It != E; ++It) { 149259698Sdim DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 150259698Sdim Untouched->RegToChan.find((*It).first); 151259698Sdim if (PosInUntouched != Untouched->RegToChan.end()) { 152259698Sdim Remap.push_back(std::pair<unsigned, unsigned> 153259698Sdim ((*It).second, (*PosInUntouched).second)); 154259698Sdim continue; 155259698Sdim } 156259698Sdim if (CurrentUndexIdx >= Untouched->UndefReg.size()) 157259698Sdim return false; 158259698Sdim Remap.push_back(std::pair<unsigned, unsigned> 159259698Sdim ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 160259698Sdim } 161259698Sdim 162259698Sdim return true; 163259698Sdim} 164259698Sdim 165259698Sdimstatic 166259698Sdimunsigned getReassignedChan( 167259698Sdim const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 168259698Sdim unsigned Chan) { 169259698Sdim for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 170259698Sdim if (RemapChan[j].first == Chan) 171259698Sdim return RemapChan[j].second; 172259698Sdim } 173259698Sdim llvm_unreachable("Chan wasn't reassigned"); 174259698Sdim} 175259698Sdim 176259698SdimMachineInstr *R600VectorRegMerger::RebuildVector( 177259698Sdim RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 178259698Sdim const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 179259698Sdim unsigned Reg = RSI->Instr->getOperand(0).getReg(); 180259698Sdim MachineBasicBlock::iterator Pos = RSI->Instr; 181259698Sdim MachineBasicBlock &MBB = *Pos->getParent(); 182259698Sdim DebugLoc DL = Pos->getDebugLoc(); 183259698Sdim 184259698Sdim unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 185259698Sdim DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 186259698Sdim std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 187259698Sdim for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 188259698Sdim E = RSI->RegToChan.end(); It != E; ++It) { 189259698Sdim unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 190259698Sdim unsigned SubReg = (*It).first; 191259698Sdim unsigned Swizzle = (*It).second; 192259698Sdim unsigned Chan = getReassignedChan(RemapChan, Swizzle); 193259698Sdim 194259698Sdim MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 195259698Sdim DstReg) 196259698Sdim .addReg(SrcVec) 197259698Sdim .addReg(SubReg) 198259698Sdim .addImm(Chan); 199259698Sdim UpdatedRegToChan[SubReg] = Chan; 200259698Sdim std::vector<unsigned>::iterator ChanPos = 201259698Sdim std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 202259698Sdim if (ChanPos != UpdatedUndef.end()) 203259698Sdim UpdatedUndef.erase(ChanPos); 204259698Sdim assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 205259698Sdim UpdatedUndef.end() && 206259698Sdim "UpdatedUndef shouldn't contain Chan more than once!"); 207259698Sdim DEBUG(dbgs() << " ->"; Tmp->dump();); 208259698Sdim (void)Tmp; 209259698Sdim SrcVec = DstReg; 210259698Sdim } 211259698Sdim Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 212259698Sdim .addReg(SrcVec); 213259698Sdim DEBUG(dbgs() << " ->"; Pos->dump();); 214259698Sdim 215259698Sdim DEBUG(dbgs() << " Updating Swizzle:\n"); 216259698Sdim for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 217259698Sdim E = MRI->use_end(); It != E; ++It) { 218259698Sdim DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 219259698Sdim SwizzleInput(*It, RemapChan); 220259698Sdim DEBUG((*It).dump()); 221259698Sdim } 222259698Sdim RSI->Instr->eraseFromParent(); 223259698Sdim 224259698Sdim // Update RSI 225259698Sdim RSI->Instr = Pos; 226259698Sdim RSI->RegToChan = UpdatedRegToChan; 227259698Sdim RSI->UndefReg = UpdatedUndef; 228259698Sdim 229259698Sdim return Pos; 230259698Sdim} 231259698Sdim 232259698Sdimvoid R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 233259698Sdim for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 234259698Sdim E = PreviousRegSeqByReg.end(); It != E; ++It) { 235259698Sdim std::vector<MachineInstr *> &MIs = (*It).second; 236259698Sdim MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 237259698Sdim } 238259698Sdim for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 239259698Sdim E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 240259698Sdim std::vector<MachineInstr *> &MIs = (*It).second; 241259698Sdim MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 242259698Sdim } 243259698Sdim} 244259698Sdim 245259698Sdimvoid R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 246259698Sdim const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 247259698Sdim unsigned Offset; 248259698Sdim if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 249259698Sdim Offset = 2; 250259698Sdim else 251259698Sdim Offset = 3; 252259698Sdim for (unsigned i = 0; i < 4; i++) { 253259698Sdim unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 254259698Sdim for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 255259698Sdim if (RemapChan[j].first == Swizzle) { 256259698Sdim MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 257259698Sdim break; 258259698Sdim } 259259698Sdim } 260259698Sdim } 261259698Sdim} 262259698Sdim 263259698Sdimbool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 264259698Sdim for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 265259698Sdim E = MRI->use_end(); It != E; ++It) { 266259698Sdim if (!canSwizzle(*It)) 267259698Sdim return false; 268259698Sdim } 269259698Sdim return true; 270259698Sdim} 271259698Sdim 272259698Sdimbool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 273259698Sdim RegSeqInfo &CompatibleRSI, 274259698Sdim std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 275259698Sdim for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 276259698Sdim MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 277259698Sdim if (!MOp->isReg()) 278259698Sdim continue; 279259698Sdim if (PreviousRegSeqByReg[MOp->getReg()].empty()) 280259698Sdim continue; 281259698Sdim std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; 282259698Sdim for (unsigned i = 0, e = MIs.size(); i < e; i++) { 283259698Sdim CompatibleRSI = PreviousRegSeq[MIs[i]]; 284259698Sdim if (RSI == CompatibleRSI) 285259698Sdim continue; 286259698Sdim if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 287259698Sdim return true; 288259698Sdim } 289259698Sdim } 290259698Sdim return false; 291259698Sdim} 292259698Sdim 293259698Sdimbool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 294259698Sdim RegSeqInfo &CompatibleRSI, 295259698Sdim std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 296259698Sdim unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 297259698Sdim if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 298259698Sdim return false; 299259698Sdim std::vector<MachineInstr *> &MIs = 300259698Sdim PreviousRegSeqByUndefCount[NeededUndefs]; 301259698Sdim CompatibleRSI = PreviousRegSeq[MIs.back()]; 302259698Sdim tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 303259698Sdim return true; 304259698Sdim} 305259698Sdim 306259698Sdimvoid R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 307259698Sdim for (DenseMap<unsigned, unsigned>::const_iterator 308259698Sdim It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 309259698Sdim PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 310259698Sdim } 311259698Sdim PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 312259698Sdim PreviousRegSeq[RSI.Instr] = RSI; 313259698Sdim} 314259698Sdim 315259698Sdimbool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 316259698Sdim TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo()); 317259698Sdim MRI = &(Fn.getRegInfo()); 318259698Sdim for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 319259698Sdim MBB != MBBe; ++MBB) { 320259698Sdim MachineBasicBlock *MB = MBB; 321259698Sdim PreviousRegSeq.clear(); 322259698Sdim PreviousRegSeqByReg.clear(); 323259698Sdim PreviousRegSeqByUndefCount.clear(); 324259698Sdim 325259698Sdim for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 326259698Sdim MII != MIIE; ++MII) { 327259698Sdim MachineInstr *MI = MII; 328259698Sdim if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 329259698Sdim if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 330259698Sdim unsigned Reg = MI->getOperand(1).getReg(); 331259698Sdim for (MachineRegisterInfo::def_iterator It = MRI->def_begin(Reg), 332259698Sdim E = MRI->def_end(); It != E; ++It) { 333259698Sdim RemoveMI(&(*It)); 334259698Sdim } 335259698Sdim } 336259698Sdim continue; 337259698Sdim } 338259698Sdim 339259698Sdim 340259698Sdim RegSeqInfo RSI(*MRI, MI); 341259698Sdim 342259698Sdim // All uses of MI are swizzeable ? 343259698Sdim unsigned Reg = MI->getOperand(0).getReg(); 344259698Sdim if (!areAllUsesSwizzeable(Reg)) 345259698Sdim continue; 346259698Sdim 347259698Sdim DEBUG (dbgs() << "Trying to optimize "; 348259698Sdim MI->dump(); 349259698Sdim ); 350259698Sdim 351259698Sdim RegSeqInfo CandidateRSI; 352259698Sdim std::vector<std::pair<unsigned, unsigned> > RemapChan; 353259698Sdim DEBUG(dbgs() << "Using common slots...\n";); 354259698Sdim if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 355259698Sdim // Remove CandidateRSI mapping 356259698Sdim RemoveMI(CandidateRSI.Instr); 357259698Sdim MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 358259698Sdim trackRSI(RSI); 359259698Sdim continue; 360259698Sdim } 361259698Sdim DEBUG(dbgs() << "Using free slots...\n";); 362259698Sdim RemapChan.clear(); 363259698Sdim if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 364259698Sdim RemoveMI(CandidateRSI.Instr); 365259698Sdim MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 366259698Sdim trackRSI(RSI); 367259698Sdim continue; 368259698Sdim } 369259698Sdim //Failed to merge 370259698Sdim trackRSI(RSI); 371259698Sdim } 372259698Sdim } 373259698Sdim return false; 374259698Sdim} 375259698Sdim 376259698Sdim} 377259698Sdim 378259698Sdimllvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 379259698Sdim return new R600VectorRegMerger(tm); 380259698Sdim} 381