1259698Sdim//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim/// \file 11259698Sdim/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 12259698Sdim/// This pass is merging consecutive CFAlus where applicable. 13259698Sdim/// It needs to be called after IfCvt for best results. 14259698Sdim//===----------------------------------------------------------------------===// 15259698Sdim 16259698Sdim#define DEBUG_TYPE "r600mergeclause" 17259698Sdim#include "AMDGPU.h" 18259698Sdim#include "R600Defines.h" 19259698Sdim#include "R600InstrInfo.h" 20259698Sdim#include "R600MachineFunctionInfo.h" 21259698Sdim#include "R600RegisterInfo.h" 22259698Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 23259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 24259698Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25259698Sdim#include "llvm/Support/Debug.h" 26259698Sdim#include "llvm/Support/raw_ostream.h" 27259698Sdim 28259698Sdimusing namespace llvm; 29259698Sdim 30259698Sdimnamespace { 31259698Sdim 32259698Sdimstatic bool isCFAlu(const MachineInstr *MI) { 33259698Sdim switch (MI->getOpcode()) { 34259698Sdim case AMDGPU::CF_ALU: 35259698Sdim case AMDGPU::CF_ALU_PUSH_BEFORE: 36259698Sdim return true; 37259698Sdim default: 38259698Sdim return false; 39259698Sdim } 40259698Sdim} 41259698Sdim 42259698Sdimclass R600ClauseMergePass : public MachineFunctionPass { 43259698Sdim 44259698Sdimprivate: 45259698Sdim static char ID; 46259698Sdim const R600InstrInfo *TII; 47259698Sdim 48259698Sdim unsigned getCFAluSize(const MachineInstr *MI) const; 49259698Sdim bool isCFAluEnabled(const MachineInstr *MI) const; 50259698Sdim 51259698Sdim /// IfCvt pass can generate "disabled" ALU clause marker that need to be 52259698Sdim /// removed and their content affected to the previous alu clause. 53259698Sdim /// This function parse instructions after CFAlu untill it find a disabled 54259698Sdim /// CFAlu and merge the content, or an enabled CFAlu. 55259698Sdim void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; 56259698Sdim 57259698Sdim /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 58259698Sdim /// it is the case. 59259698Sdim bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) 60259698Sdim const; 61259698Sdim 62259698Sdimpublic: 63259698Sdim R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 64259698Sdim 65259698Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 66259698Sdim 67259698Sdim const char *getPassName() const; 68259698Sdim}; 69259698Sdim 70259698Sdimchar R600ClauseMergePass::ID = 0; 71259698Sdim 72259698Sdimunsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { 73259698Sdim assert(isCFAlu(MI)); 74259698Sdim return MI->getOperand( 75259698Sdim TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); 76259698Sdim} 77259698Sdim 78259698Sdimbool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { 79259698Sdim assert(isCFAlu(MI)); 80259698Sdim return MI->getOperand( 81259698Sdim TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); 82259698Sdim} 83259698Sdim 84259698Sdimvoid R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) 85259698Sdim const { 86259698Sdim int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 87259698Sdim MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); 88259698Sdim I++; 89259698Sdim do { 90259698Sdim while (I!= E && !isCFAlu(I)) 91259698Sdim I++; 92259698Sdim if (I == E) 93259698Sdim return; 94259698Sdim MachineInstr *MI = I++; 95259698Sdim if (isCFAluEnabled(MI)) 96259698Sdim break; 97259698Sdim CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 98259698Sdim MI->eraseFromParent(); 99259698Sdim } while (I != E); 100259698Sdim} 101259698Sdim 102259698Sdimbool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, 103259698Sdim const MachineInstr *LatrCFAlu) const { 104259698Sdim assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 105259698Sdim int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 106259698Sdim unsigned RootInstCount = getCFAluSize(RootCFAlu), 107259698Sdim LaterInstCount = getCFAluSize(LatrCFAlu); 108259698Sdim unsigned CumuledInsts = RootInstCount + LaterInstCount; 109259698Sdim if (CumuledInsts >= TII->getMaxAlusPerClause()) { 110259698Sdim DEBUG(dbgs() << "Excess inst counts\n"); 111259698Sdim return false; 112259698Sdim } 113259698Sdim if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 114259698Sdim return false; 115259698Sdim // Is KCache Bank 0 compatible ? 116259698Sdim int Mode0Idx = 117259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 118259698Sdim int KBank0Idx = 119259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 120259698Sdim int KBank0LineIdx = 121259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 122259698Sdim if (LatrCFAlu->getOperand(Mode0Idx).getImm() && 123259698Sdim RootCFAlu->getOperand(Mode0Idx).getImm() && 124259698Sdim (LatrCFAlu->getOperand(KBank0Idx).getImm() != 125259698Sdim RootCFAlu->getOperand(KBank0Idx).getImm() || 126259698Sdim LatrCFAlu->getOperand(KBank0LineIdx).getImm() != 127259698Sdim RootCFAlu->getOperand(KBank0LineIdx).getImm())) { 128259698Sdim DEBUG(dbgs() << "Wrong KC0\n"); 129259698Sdim return false; 130259698Sdim } 131259698Sdim // Is KCache Bank 1 compatible ? 132259698Sdim int Mode1Idx = 133259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 134259698Sdim int KBank1Idx = 135259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 136259698Sdim int KBank1LineIdx = 137259698Sdim TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 138259698Sdim if (LatrCFAlu->getOperand(Mode1Idx).getImm() && 139259698Sdim RootCFAlu->getOperand(Mode1Idx).getImm() && 140259698Sdim (LatrCFAlu->getOperand(KBank1Idx).getImm() != 141259698Sdim RootCFAlu->getOperand(KBank1Idx).getImm() || 142259698Sdim LatrCFAlu->getOperand(KBank1LineIdx).getImm() != 143259698Sdim RootCFAlu->getOperand(KBank1LineIdx).getImm())) { 144259698Sdim DEBUG(dbgs() << "Wrong KC0\n"); 145259698Sdim return false; 146259698Sdim } 147259698Sdim if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { 148259698Sdim RootCFAlu->getOperand(Mode0Idx).setImm( 149259698Sdim LatrCFAlu->getOperand(Mode0Idx).getImm()); 150259698Sdim RootCFAlu->getOperand(KBank0Idx).setImm( 151259698Sdim LatrCFAlu->getOperand(KBank0Idx).getImm()); 152259698Sdim RootCFAlu->getOperand(KBank0LineIdx).setImm( 153259698Sdim LatrCFAlu->getOperand(KBank0LineIdx).getImm()); 154259698Sdim } 155259698Sdim if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { 156259698Sdim RootCFAlu->getOperand(Mode1Idx).setImm( 157259698Sdim LatrCFAlu->getOperand(Mode1Idx).getImm()); 158259698Sdim RootCFAlu->getOperand(KBank1Idx).setImm( 159259698Sdim LatrCFAlu->getOperand(KBank1Idx).getImm()); 160259698Sdim RootCFAlu->getOperand(KBank1LineIdx).setImm( 161259698Sdim LatrCFAlu->getOperand(KBank1LineIdx).getImm()); 162259698Sdim } 163259698Sdim RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); 164259698Sdim RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); 165259698Sdim return true; 166259698Sdim} 167259698Sdim 168259698Sdimbool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 169259698Sdim TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 170259698Sdim for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 171259698Sdim BB != BB_E; ++BB) { 172259698Sdim MachineBasicBlock &MBB = *BB; 173259698Sdim MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 174259698Sdim MachineBasicBlock::iterator LatestCFAlu = E; 175259698Sdim while (I != E) { 176259698Sdim MachineInstr *MI = I++; 177259698Sdim if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 178259698Sdim TII->mustBeLastInClause(MI->getOpcode())) 179259698Sdim LatestCFAlu = E; 180259698Sdim if (!isCFAlu(MI)) 181259698Sdim continue; 182259698Sdim cleanPotentialDisabledCFAlu(MI); 183259698Sdim 184259698Sdim if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { 185259698Sdim MI->eraseFromParent(); 186259698Sdim } else { 187259698Sdim assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); 188259698Sdim LatestCFAlu = MI; 189259698Sdim } 190259698Sdim } 191259698Sdim } 192259698Sdim return false; 193259698Sdim} 194259698Sdim 195259698Sdimconst char *R600ClauseMergePass::getPassName() const { 196259698Sdim return "R600 Merge Clause Markers Pass"; 197259698Sdim} 198259698Sdim 199259698Sdim} // end anonymous namespace 200259698Sdim 201259698Sdim 202259698Sdimllvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 203259698Sdim return new R600ClauseMergePass(TM); 204259698Sdim} 205