1//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 12/// This pass is merging consecutive CFAlus where applicable. 13/// It needs to be called after IfCvt for best results. 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "r600mergeclause" 17#include "AMDGPU.h" 18#include "R600Defines.h" 19#include "R600InstrInfo.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/raw_ostream.h" 27 28using namespace llvm; 29 30namespace { 31 32static bool isCFAlu(const MachineInstr *MI) { 33 switch (MI->getOpcode()) { 34 case AMDGPU::CF_ALU: 35 case AMDGPU::CF_ALU_PUSH_BEFORE: 36 return true; 37 default: 38 return false; 39 } 40} 41 42class R600ClauseMergePass : public MachineFunctionPass { 43 44private: 45 static char ID; 46 const R600InstrInfo *TII; 47 48 unsigned getCFAluSize(const MachineInstr *MI) const; 49 bool isCFAluEnabled(const MachineInstr *MI) const; 50 51 /// IfCvt pass can generate "disabled" ALU clause marker that need to be 52 /// removed and their content affected to the previous alu clause. 53 /// This function parse instructions after CFAlu untill it find a disabled 54 /// CFAlu and merge the content, or an enabled CFAlu. 55 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; 56 57 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 58 /// it is the case. 59 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) 60 const; 61 62public: 63 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 64 65 virtual bool runOnMachineFunction(MachineFunction &MF); 66 67 const char *getPassName() const; 68}; 69 70char R600ClauseMergePass::ID = 0; 71 72unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { 73 assert(isCFAlu(MI)); 74 return MI->getOperand( 75 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); 76} 77 78bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { 79 assert(isCFAlu(MI)); 80 return MI->getOperand( 81 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); 82} 83 84void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) 85 const { 86 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 87 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); 88 I++; 89 do { 90 while (I!= E && !isCFAlu(I)) 91 I++; 92 if (I == E) 93 return; 94 MachineInstr *MI = I++; 95 if (isCFAluEnabled(MI)) 96 break; 97 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 98 MI->eraseFromParent(); 99 } while (I != E); 100} 101 102bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, 103 const MachineInstr *LatrCFAlu) const { 104 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 105 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 106 unsigned RootInstCount = getCFAluSize(RootCFAlu), 107 LaterInstCount = getCFAluSize(LatrCFAlu); 108 unsigned CumuledInsts = RootInstCount + LaterInstCount; 109 if (CumuledInsts >= TII->getMaxAlusPerClause()) { 110 DEBUG(dbgs() << "Excess inst counts\n"); 111 return false; 112 } 113 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 114 return false; 115 // Is KCache Bank 0 compatible ? 116 int Mode0Idx = 117 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 118 int KBank0Idx = 119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 120 int KBank0LineIdx = 121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 122 if (LatrCFAlu->getOperand(Mode0Idx).getImm() && 123 RootCFAlu->getOperand(Mode0Idx).getImm() && 124 (LatrCFAlu->getOperand(KBank0Idx).getImm() != 125 RootCFAlu->getOperand(KBank0Idx).getImm() || 126 LatrCFAlu->getOperand(KBank0LineIdx).getImm() != 127 RootCFAlu->getOperand(KBank0LineIdx).getImm())) { 128 DEBUG(dbgs() << "Wrong KC0\n"); 129 return false; 130 } 131 // Is KCache Bank 1 compatible ? 132 int Mode1Idx = 133 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 134 int KBank1Idx = 135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 136 int KBank1LineIdx = 137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 138 if (LatrCFAlu->getOperand(Mode1Idx).getImm() && 139 RootCFAlu->getOperand(Mode1Idx).getImm() && 140 (LatrCFAlu->getOperand(KBank1Idx).getImm() != 141 RootCFAlu->getOperand(KBank1Idx).getImm() || 142 LatrCFAlu->getOperand(KBank1LineIdx).getImm() != 143 RootCFAlu->getOperand(KBank1LineIdx).getImm())) { 144 DEBUG(dbgs() << "Wrong KC0\n"); 145 return false; 146 } 147 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { 148 RootCFAlu->getOperand(Mode0Idx).setImm( 149 LatrCFAlu->getOperand(Mode0Idx).getImm()); 150 RootCFAlu->getOperand(KBank0Idx).setImm( 151 LatrCFAlu->getOperand(KBank0Idx).getImm()); 152 RootCFAlu->getOperand(KBank0LineIdx).setImm( 153 LatrCFAlu->getOperand(KBank0LineIdx).getImm()); 154 } 155 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { 156 RootCFAlu->getOperand(Mode1Idx).setImm( 157 LatrCFAlu->getOperand(Mode1Idx).getImm()); 158 RootCFAlu->getOperand(KBank1Idx).setImm( 159 LatrCFAlu->getOperand(KBank1Idx).getImm()); 160 RootCFAlu->getOperand(KBank1LineIdx).setImm( 161 LatrCFAlu->getOperand(KBank1LineIdx).getImm()); 162 } 163 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); 164 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); 165 return true; 166} 167 168bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 169 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 170 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 171 BB != BB_E; ++BB) { 172 MachineBasicBlock &MBB = *BB; 173 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 174 MachineBasicBlock::iterator LatestCFAlu = E; 175 while (I != E) { 176 MachineInstr *MI = I++; 177 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 178 TII->mustBeLastInClause(MI->getOpcode())) 179 LatestCFAlu = E; 180 if (!isCFAlu(MI)) 181 continue; 182 cleanPotentialDisabledCFAlu(MI); 183 184 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { 185 MI->eraseFromParent(); 186 } else { 187 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); 188 LatestCFAlu = MI; 189 } 190 } 191 } 192 return false; 193} 194 195const char *R600ClauseMergePass::getPassName() const { 196 return "R600 Merge Clause Markers Pass"; 197} 198 199} // end anonymous namespace 200 201 202llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 203 return new R600ClauseMergePass(TM); 204} 205