1259698Sdim//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim//
10259698Sdim/// \file
11259698Sdim/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12259698Sdim/// This pass is merging consecutive CFAlus where applicable.
13259698Sdim/// It needs to be called after IfCvt for best results.
14259698Sdim//===----------------------------------------------------------------------===//
15259698Sdim
16259698Sdim#define DEBUG_TYPE "r600mergeclause"
17259698Sdim#include "AMDGPU.h"
18259698Sdim#include "R600Defines.h"
19259698Sdim#include "R600InstrInfo.h"
20259698Sdim#include "R600MachineFunctionInfo.h"
21259698Sdim#include "R600RegisterInfo.h"
22259698Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
23259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
24259698Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
25259698Sdim#include "llvm/Support/Debug.h"
26259698Sdim#include "llvm/Support/raw_ostream.h"
27259698Sdim
28259698Sdimusing namespace llvm;
29259698Sdim
30259698Sdimnamespace {
31259698Sdim
32259698Sdimstatic bool isCFAlu(const MachineInstr *MI) {
33259698Sdim  switch (MI->getOpcode()) {
34259698Sdim  case AMDGPU::CF_ALU:
35259698Sdim  case AMDGPU::CF_ALU_PUSH_BEFORE:
36259698Sdim    return true;
37259698Sdim  default:
38259698Sdim    return false;
39259698Sdim  }
40259698Sdim}
41259698Sdim
42259698Sdimclass R600ClauseMergePass : public MachineFunctionPass {
43259698Sdim
44259698Sdimprivate:
45259698Sdim  static char ID;
46259698Sdim  const R600InstrInfo *TII;
47259698Sdim
48259698Sdim  unsigned getCFAluSize(const MachineInstr *MI) const;
49259698Sdim  bool isCFAluEnabled(const MachineInstr *MI) const;
50259698Sdim
51259698Sdim  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
52259698Sdim  /// removed and their content affected to the previous alu clause.
53259698Sdim  /// This function parse instructions after CFAlu untill it find a disabled
54259698Sdim  /// CFAlu and merge the content, or an enabled CFAlu.
55259698Sdim  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
56259698Sdim
57259698Sdim  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
58259698Sdim  /// it is the case.
59259698Sdim  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
60259698Sdim      const;
61259698Sdim
62259698Sdimpublic:
63259698Sdim  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
64259698Sdim
65259698Sdim  virtual bool runOnMachineFunction(MachineFunction &MF);
66259698Sdim
67259698Sdim  const char *getPassName() const;
68259698Sdim};
69259698Sdim
70259698Sdimchar R600ClauseMergePass::ID = 0;
71259698Sdim
72259698Sdimunsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
73259698Sdim  assert(isCFAlu(MI));
74259698Sdim  return MI->getOperand(
75259698Sdim      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
76259698Sdim}
77259698Sdim
78259698Sdimbool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
79259698Sdim  assert(isCFAlu(MI));
80259698Sdim  return MI->getOperand(
81259698Sdim      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
82259698Sdim}
83259698Sdim
84259698Sdimvoid R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
85259698Sdim    const {
86259698Sdim  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
87259698Sdim  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
88259698Sdim  I++;
89259698Sdim  do {
90259698Sdim    while (I!= E && !isCFAlu(I))
91259698Sdim      I++;
92259698Sdim    if (I == E)
93259698Sdim      return;
94259698Sdim    MachineInstr *MI = I++;
95259698Sdim    if (isCFAluEnabled(MI))
96259698Sdim      break;
97259698Sdim    CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
98259698Sdim    MI->eraseFromParent();
99259698Sdim  } while (I != E);
100259698Sdim}
101259698Sdim
102259698Sdimbool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
103259698Sdim                                          const MachineInstr *LatrCFAlu) const {
104259698Sdim  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
105259698Sdim  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
106259698Sdim  unsigned RootInstCount = getCFAluSize(RootCFAlu),
107259698Sdim      LaterInstCount = getCFAluSize(LatrCFAlu);
108259698Sdim  unsigned CumuledInsts = RootInstCount + LaterInstCount;
109259698Sdim  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
110259698Sdim    DEBUG(dbgs() << "Excess inst counts\n");
111259698Sdim    return false;
112259698Sdim  }
113259698Sdim  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
114259698Sdim    return false;
115259698Sdim  // Is KCache Bank 0 compatible ?
116259698Sdim  int Mode0Idx =
117259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
118259698Sdim  int KBank0Idx =
119259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
120259698Sdim  int KBank0LineIdx =
121259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
122259698Sdim  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
123259698Sdim      RootCFAlu->getOperand(Mode0Idx).getImm() &&
124259698Sdim      (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
125259698Sdim       RootCFAlu->getOperand(KBank0Idx).getImm() ||
126259698Sdim      LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
127259698Sdim      RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
128259698Sdim    DEBUG(dbgs() << "Wrong KC0\n");
129259698Sdim    return false;
130259698Sdim  }
131259698Sdim  // Is KCache Bank 1 compatible ?
132259698Sdim  int Mode1Idx =
133259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
134259698Sdim  int KBank1Idx =
135259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
136259698Sdim  int KBank1LineIdx =
137259698Sdim      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
138259698Sdim  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
139259698Sdim      RootCFAlu->getOperand(Mode1Idx).getImm() &&
140259698Sdim      (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
141259698Sdim      RootCFAlu->getOperand(KBank1Idx).getImm() ||
142259698Sdim      LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
143259698Sdim      RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
144259698Sdim    DEBUG(dbgs() << "Wrong KC0\n");
145259698Sdim    return false;
146259698Sdim  }
147259698Sdim  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
148259698Sdim    RootCFAlu->getOperand(Mode0Idx).setImm(
149259698Sdim        LatrCFAlu->getOperand(Mode0Idx).getImm());
150259698Sdim    RootCFAlu->getOperand(KBank0Idx).setImm(
151259698Sdim        LatrCFAlu->getOperand(KBank0Idx).getImm());
152259698Sdim    RootCFAlu->getOperand(KBank0LineIdx).setImm(
153259698Sdim        LatrCFAlu->getOperand(KBank0LineIdx).getImm());
154259698Sdim  }
155259698Sdim  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
156259698Sdim    RootCFAlu->getOperand(Mode1Idx).setImm(
157259698Sdim        LatrCFAlu->getOperand(Mode1Idx).getImm());
158259698Sdim    RootCFAlu->getOperand(KBank1Idx).setImm(
159259698Sdim        LatrCFAlu->getOperand(KBank1Idx).getImm());
160259698Sdim    RootCFAlu->getOperand(KBank1LineIdx).setImm(
161259698Sdim        LatrCFAlu->getOperand(KBank1LineIdx).getImm());
162259698Sdim  }
163259698Sdim  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
164259698Sdim  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
165259698Sdim  return true;
166259698Sdim}
167259698Sdim
168259698Sdimbool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
169259698Sdim  TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
170259698Sdim  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
171259698Sdim                                                  BB != BB_E; ++BB) {
172259698Sdim    MachineBasicBlock &MBB = *BB;
173259698Sdim    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
174259698Sdim    MachineBasicBlock::iterator LatestCFAlu = E;
175259698Sdim    while (I != E) {
176259698Sdim      MachineInstr *MI = I++;
177259698Sdim      if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
178259698Sdim          TII->mustBeLastInClause(MI->getOpcode()))
179259698Sdim        LatestCFAlu = E;
180259698Sdim      if (!isCFAlu(MI))
181259698Sdim        continue;
182259698Sdim      cleanPotentialDisabledCFAlu(MI);
183259698Sdim
184259698Sdim      if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
185259698Sdim        MI->eraseFromParent();
186259698Sdim      } else {
187259698Sdim        assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
188259698Sdim        LatestCFAlu = MI;
189259698Sdim      }
190259698Sdim    }
191259698Sdim  }
192259698Sdim  return false;
193259698Sdim}
194259698Sdim
195259698Sdimconst char *R600ClauseMergePass::getPassName() const {
196259698Sdim  return "R600 Merge Clause Markers Pass";
197259698Sdim}
198259698Sdim
199259698Sdim} // end anonymous namespace
200259698Sdim
201259698Sdim
202259698Sdimllvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
203259698Sdim  return new R600ClauseMergePass(TM);
204259698Sdim}
205