R600ControlFlowFinalizer.cpp revision 249259
1//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// This pass compute turns all control flow pseudo instructions into native one 12/// computing their address on the fly ; it also sets STACK_SIZE info. 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "r600cf" 16#include "llvm/Support/Debug.h" 17#include "llvm/Support/raw_ostream.h" 18 19#include "AMDGPU.h" 20#include "R600Defines.h" 21#include "R600InstrInfo.h" 22#include "R600MachineFunctionInfo.h" 23#include "R600RegisterInfo.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstrBuilder.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27 28namespace llvm { 29 30class R600ControlFlowFinalizer : public MachineFunctionPass { 31 32private: 33 static char ID; 34 const R600InstrInfo *TII; 35 unsigned MaxFetchInst; 36 37 bool isFetch(const MachineInstr *MI) const { 38 switch (MI->getOpcode()) { 39 case AMDGPU::TEX_VTX_CONSTBUF: 40 case AMDGPU::TEX_VTX_TEXBUF: 41 case AMDGPU::TEX_LD: 42 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 43 case AMDGPU::TEX_GET_GRADIENTS_H: 44 case AMDGPU::TEX_GET_GRADIENTS_V: 45 case AMDGPU::TEX_SET_GRADIENTS_H: 46 case AMDGPU::TEX_SET_GRADIENTS_V: 47 case AMDGPU::TEX_SAMPLE: 48 case AMDGPU::TEX_SAMPLE_C: 49 case AMDGPU::TEX_SAMPLE_L: 50 case AMDGPU::TEX_SAMPLE_C_L: 51 case AMDGPU::TEX_SAMPLE_LB: 52 case AMDGPU::TEX_SAMPLE_C_LB: 53 case AMDGPU::TEX_SAMPLE_G: 54 case AMDGPU::TEX_SAMPLE_C_G: 55 case AMDGPU::TXD: 56 case AMDGPU::TXD_SHADOW: 57 return true; 58 default: 59 return false; 60 } 61 } 62 63 bool IsTrivialInst(MachineInstr *MI) const { 64 switch (MI->getOpcode()) { 65 case AMDGPU::KILL: 66 case AMDGPU::RETURN: 67 return true; 68 default: 69 return false; 70 } 71 } 72 73 MachineBasicBlock::iterator 74 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 75 unsigned CfAddress) const { 76 MachineBasicBlock::iterator ClauseHead = I; 77 unsigned AluInstCount = 0; 78 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 79 if (IsTrivialInst(I)) 80 continue; 81 if (!isFetch(I)) 82 break; 83 AluInstCount ++; 84 if (AluInstCount > MaxFetchInst) 85 break; 86 } 87 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 88 TII->get(AMDGPU::CF_TC)) 89 .addImm(CfAddress) // ADDR 90 .addImm(AluInstCount); // COUNT 91 return I; 92 } 93 void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { 94 MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); 95 } 96 void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr) 97 const { 98 for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end(); 99 It != E; ++It) { 100 MachineInstr *MI = *It; 101 CounterPropagateAddr(MI, Addr); 102 } 103 } 104 105public: 106 R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), 107 TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { 108 const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); 109 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) 110 MaxFetchInst = 8; 111 else 112 MaxFetchInst = 16; 113 } 114 115 virtual bool runOnMachineFunction(MachineFunction &MF) { 116 unsigned MaxStack = 0; 117 unsigned CurrentStack = 0; 118 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 119 ++MB) { 120 MachineBasicBlock &MBB = *MB; 121 unsigned CfCount = 0; 122 std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; 123 std::vector<MachineInstr * > IfThenElseStack; 124 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 125 if (MFI->ShaderType == 1) { 126 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 127 TII->get(AMDGPU::CF_CALL_FS)); 128 CfCount++; 129 } 130 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 131 I != E;) { 132 if (isFetch(I)) { 133 DEBUG(dbgs() << CfCount << ":"; I->dump();); 134 I = MakeFetchClause(MBB, I, 0); 135 CfCount++; 136 continue; 137 } 138 139 MachineBasicBlock::iterator MI = I; 140 I++; 141 switch (MI->getOpcode()) { 142 case AMDGPU::CF_ALU_PUSH_BEFORE: 143 CurrentStack++; 144 MaxStack = std::max(MaxStack, CurrentStack); 145 case AMDGPU::CF_ALU: 146 case AMDGPU::EG_ExportBuf: 147 case AMDGPU::EG_ExportSwz: 148 case AMDGPU::R600_ExportBuf: 149 case AMDGPU::R600_ExportSwz: 150 DEBUG(dbgs() << CfCount << ":"; MI->dump();); 151 CfCount++; 152 break; 153 case AMDGPU::WHILELOOP: { 154 CurrentStack++; 155 MaxStack = std::max(MaxStack, CurrentStack); 156 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 157 TII->get(AMDGPU::WHILE_LOOP)) 158 .addImm(2); 159 std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, 160 std::set<MachineInstr *>()); 161 Pair.second.insert(MIb); 162 LoopStack.push_back(Pair); 163 MI->eraseFromParent(); 164 CfCount++; 165 break; 166 } 167 case AMDGPU::ENDLOOP: { 168 CurrentStack--; 169 std::pair<unsigned, std::set<MachineInstr *> > Pair = 170 LoopStack.back(); 171 LoopStack.pop_back(); 172 CounterPropagateAddr(Pair.second, CfCount); 173 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) 174 .addImm(Pair.first + 1); 175 MI->eraseFromParent(); 176 CfCount++; 177 break; 178 } 179 case AMDGPU::IF_PREDICATE_SET: { 180 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 181 TII->get(AMDGPU::CF_JUMP)) 182 .addImm(0) 183 .addImm(0); 184 IfThenElseStack.push_back(MIb); 185 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 186 MI->eraseFromParent(); 187 CfCount++; 188 break; 189 } 190 case AMDGPU::ELSE: { 191 MachineInstr * JumpInst = IfThenElseStack.back(); 192 IfThenElseStack.pop_back(); 193 CounterPropagateAddr(JumpInst, CfCount); 194 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 195 TII->get(AMDGPU::CF_ELSE)) 196 .addImm(0) 197 .addImm(1); 198 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 199 IfThenElseStack.push_back(MIb); 200 MI->eraseFromParent(); 201 CfCount++; 202 break; 203 } 204 case AMDGPU::ENDIF: { 205 CurrentStack--; 206 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 207 IfThenElseStack.pop_back(); 208 CounterPropagateAddr(IfOrElseInst, CfCount + 1); 209 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 210 TII->get(AMDGPU::POP)) 211 .addImm(CfCount + 1) 212 .addImm(1); 213 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 214 MI->eraseFromParent(); 215 CfCount++; 216 break; 217 } 218 case AMDGPU::PREDICATED_BREAK: { 219 CurrentStack--; 220 CfCount += 3; 221 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) 222 .addImm(CfCount) 223 .addImm(1); 224 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 225 TII->get(AMDGPU::LOOP_BREAK)) 226 .addImm(0); 227 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) 228 .addImm(CfCount) 229 .addImm(1); 230 LoopStack.back().second.insert(MIb); 231 MI->eraseFromParent(); 232 break; 233 } 234 case AMDGPU::CONTINUE: { 235 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 236 TII->get(AMDGPU::CF_CONTINUE)) 237 .addImm(0); 238 LoopStack.back().second.insert(MIb); 239 MI->eraseFromParent(); 240 CfCount++; 241 break; 242 } 243 default: 244 break; 245 } 246 } 247 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 248 TII->get(AMDGPU::STACK_SIZE)) 249 .addImm(MaxStack); 250 } 251 252 return false; 253 } 254 255 const char *getPassName() const { 256 return "R600 Control Flow Finalizer Pass"; 257 } 258}; 259 260char R600ControlFlowFinalizer::ID = 0; 261 262} 263 264 265llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { 266 return new R600ControlFlowFinalizer(TM); 267} 268 269