1259698Sdim//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim/// \file 11259698Sdim/// Copies from VGPR to SGPR registers are illegal and the register coalescer 12259698Sdim/// will sometimes generate these illegal copies in situations like this: 13259698Sdim/// 14259698Sdim/// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15259698Sdim/// 16259698Sdim/// BB0: 17259698Sdim/// %vreg0 <sgpr> = SCALAR_INST 18259698Sdim/// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19259698Sdim/// ... 20259698Sdim/// BRANCH %cond BB1, BB2 21259698Sdim/// BB1: 22259698Sdim/// %vreg2 <vgpr> = VECTOR_INST 23259698Sdim/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24259698Sdim/// BB2: 25259698Sdim/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26259698Sdim/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27259698Sdim/// 28259698Sdim/// 29259698Sdim/// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30259698Sdim/// code will look like this: 31259698Sdim/// 32259698Sdim/// BB0: 33259698Sdim/// %vreg0 <sgpr> = SCALAR_INST 34259698Sdim/// ... 35259698Sdim/// BRANCH %cond BB1, BB2 36259698Sdim/// BB1: 37259698Sdim/// %vreg2 <vgpr> = VECTOR_INST 38259698Sdim/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39259698Sdim/// BB2: 40259698Sdim/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41259698Sdim/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42259698Sdim/// 43259698Sdim/// Now that the result of the PHI instruction is an SGPR, the register 44259698Sdim/// allocator is now forced to constrain the register class of %vreg3 to 45259698Sdim/// <sgpr> so we end up with final code like this: 46259698Sdim/// 47259698Sdim/// BB0: 48259698Sdim/// %vreg0 <sgpr> = SCALAR_INST 49259698Sdim/// ... 50259698Sdim/// BRANCH %cond BB1, BB2 51259698Sdim/// BB1: 52259698Sdim/// %vreg2 <vgpr> = VECTOR_INST 53259698Sdim/// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54259698Sdim/// BB2: 55259698Sdim/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56259698Sdim/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57259698Sdim/// 58259698Sdim/// Now this code contains an illegal copy from a VGPR to an SGPR. 59259698Sdim/// 60259698Sdim/// In order to avoid this problem, this pass searches for PHI instructions 61259698Sdim/// which define a <vsrc> register and constrains its definition class to 62259698Sdim/// <vgpr> if the user of the PHI's definition register is a vector instruction. 63259698Sdim/// If the PHI's definition class is constrained to <vgpr> then the coalescer 64259698Sdim/// will be unable to perform the COPY removal from the above example which 65259698Sdim/// ultimately led to the creation of an illegal COPY. 66259698Sdim//===----------------------------------------------------------------------===// 67259698Sdim 68259698Sdim#define DEBUG_TYPE "sgpr-copies" 69259698Sdim#include "AMDGPU.h" 70259698Sdim#include "SIInstrInfo.h" 71259698Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 72259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 73259698Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 74259698Sdim#include "llvm/Support/Debug.h" 75259698Sdim#include "llvm/Support/raw_ostream.h" 76259698Sdim#include "llvm/Target/TargetMachine.h" 77259698Sdim 78259698Sdimusing namespace llvm; 79259698Sdim 80259698Sdimnamespace { 81259698Sdim 82259698Sdimclass SIFixSGPRCopies : public MachineFunctionPass { 83259698Sdim 84259698Sdimprivate: 85259698Sdim static char ID; 86259698Sdim const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, 87259698Sdim const MachineRegisterInfo &MRI, 88259698Sdim unsigned Reg, 89259698Sdim unsigned SubReg) const; 90259698Sdim const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, 91259698Sdim const MachineRegisterInfo &MRI, 92259698Sdim unsigned Reg, 93259698Sdim unsigned SubReg) const; 94259698Sdim bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, 95259698Sdim const MachineRegisterInfo &MRI) const; 96259698Sdim 97259698Sdimpublic: 98259698Sdim SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } 99259698Sdim 100259698Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 101259698Sdim 102259698Sdim const char *getPassName() const { 103259698Sdim return "SI Fix SGPR copies"; 104259698Sdim } 105259698Sdim 106259698Sdim}; 107259698Sdim 108259698Sdim} // End anonymous namespace 109259698Sdim 110259698Sdimchar SIFixSGPRCopies::ID = 0; 111259698Sdim 112259698SdimFunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { 113259698Sdim return new SIFixSGPRCopies(tm); 114259698Sdim} 115259698Sdim 116259698Sdimstatic bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 117259698Sdim const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 118259698Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 119259698Sdim if (!MI.getOperand(i).isReg() || 120259698Sdim !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 121259698Sdim continue; 122259698Sdim 123259698Sdim if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 124259698Sdim return true; 125259698Sdim } 126259698Sdim return false; 127259698Sdim} 128259698Sdim 129259698Sdim/// This functions walks the use list of Reg until it finds an Instruction 130259698Sdim/// that isn't a COPY returns the register class of that instruction. 131259698Sdim/// \return The register defined by the first non-COPY instruction. 132259698Sdimconst TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( 133259698Sdim const SIRegisterInfo *TRI, 134259698Sdim const MachineRegisterInfo &MRI, 135259698Sdim unsigned Reg, 136259698Sdim unsigned SubReg) const { 137259698Sdim // The Reg parameter to the function must always be defined by either a PHI 138259698Sdim // or a COPY, therefore it cannot be a physical register. 139259698Sdim assert(TargetRegisterInfo::isVirtualRegister(Reg) && 140259698Sdim "Reg cannot be a physical register"); 141259698Sdim 142259698Sdim const TargetRegisterClass *RC = MRI.getRegClass(Reg); 143259698Sdim RC = TRI->getSubRegClass(RC, SubReg); 144259698Sdim for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), 145259698Sdim E = MRI.use_end(); I != E; ++I) { 146259698Sdim switch (I->getOpcode()) { 147259698Sdim case AMDGPU::COPY: 148259698Sdim RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, 149259698Sdim I->getOperand(0).getReg(), 150259698Sdim I->getOperand(0).getSubReg())); 151259698Sdim break; 152259698Sdim } 153259698Sdim } 154259698Sdim 155259698Sdim return RC; 156259698Sdim} 157259698Sdim 158259698Sdimconst TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( 159259698Sdim const SIRegisterInfo *TRI, 160259698Sdim const MachineRegisterInfo &MRI, 161259698Sdim unsigned Reg, 162259698Sdim unsigned SubReg) const { 163259698Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 164259698Sdim const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); 165259698Sdim return TRI->getSubRegClass(RC, SubReg); 166259698Sdim } 167259698Sdim MachineInstr *Def = MRI.getVRegDef(Reg); 168259698Sdim if (Def->getOpcode() != AMDGPU::COPY) { 169259698Sdim return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); 170259698Sdim } 171259698Sdim 172259698Sdim return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), 173259698Sdim Def->getOperand(1).getSubReg()); 174259698Sdim} 175259698Sdim 176259698Sdimbool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, 177259698Sdim const SIRegisterInfo *TRI, 178259698Sdim const MachineRegisterInfo &MRI) const { 179259698Sdim 180259698Sdim unsigned DstReg = Copy.getOperand(0).getReg(); 181259698Sdim unsigned SrcReg = Copy.getOperand(1).getReg(); 182259698Sdim unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); 183259698Sdim const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 184259698Sdim const TargetRegisterClass *SrcRC; 185259698Sdim 186259698Sdim if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || 187259698Sdim DstRC == &AMDGPU::M0RegRegClass) 188259698Sdim return false; 189259698Sdim 190266715Sdim SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); 191259698Sdim return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); 192259698Sdim} 193259698Sdim 194259698Sdimbool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 195259698Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 196259698Sdim const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 197259698Sdim MF.getTarget().getRegisterInfo()); 198259698Sdim const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( 199259698Sdim MF.getTarget().getInstrInfo()); 200259698Sdim for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 201259698Sdim BI != BE; ++BI) { 202259698Sdim 203259698Sdim MachineBasicBlock &MBB = *BI; 204259698Sdim for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 205259698Sdim I != E; ++I) { 206259698Sdim MachineInstr &MI = *I; 207259698Sdim if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { 208259698Sdim DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); 209259698Sdim DEBUG(MI.print(dbgs())); 210259698Sdim TII->moveToVALU(MI); 211259698Sdim 212259698Sdim } 213259698Sdim 214259698Sdim switch (MI.getOpcode()) { 215259698Sdim default: continue; 216259698Sdim case AMDGPU::PHI: { 217259698Sdim DEBUG(dbgs() << " Fixing PHI:\n"); 218259698Sdim DEBUG(MI.print(dbgs())); 219259698Sdim 220259698Sdim for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 221259698Sdim unsigned Reg = MI.getOperand(i).getReg(); 222259698Sdim const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, 223259698Sdim MI.getOperand(0).getSubReg()); 224259698Sdim MRI.constrainRegClass(Reg, RC); 225259698Sdim } 226259698Sdim unsigned Reg = MI.getOperand(0).getReg(); 227259698Sdim const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, 228259698Sdim MI.getOperand(0).getSubReg()); 229259698Sdim if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { 230259698Sdim MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); 231259698Sdim } 232259698Sdim 233259698Sdim if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 234259698Sdim break; 235259698Sdim 236259698Sdim // If a PHI node defines an SGPR and any of its operands are VGPRs, 237259698Sdim // then we need to move it to the VALU. 238259698Sdim for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 239259698Sdim unsigned Reg = MI.getOperand(i).getReg(); 240259698Sdim if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 241259698Sdim TII->moveToVALU(MI); 242259698Sdim break; 243259698Sdim } 244259698Sdim } 245259698Sdim 246259698Sdim break; 247259698Sdim } 248259698Sdim case AMDGPU::REG_SEQUENCE: { 249259698Sdim if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 250259698Sdim !hasVGPROperands(MI, TRI)) 251259698Sdim continue; 252259698Sdim 253259698Sdim DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n"); 254259698Sdim DEBUG(MI.print(dbgs())); 255259698Sdim 256259698Sdim TII->moveToVALU(MI); 257259698Sdim break; 258259698Sdim } 259259698Sdim } 260259698Sdim } 261259698Sdim } 262259698Sdim return false; 263259698Sdim} 264