1//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11/// 128 Alu instructions ; these instructions can access up to 4 prefetched
12/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13/// initiated by CF_ALU instructions.
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600RegisterInfo.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/Pass.h"
31#include "llvm/Support/ErrorHandling.h"
32#include <cassert>
33#include <cstdint>
34#include <utility>
35#include <vector>
36
37using namespace llvm;
38
39namespace llvm {
40
41  void initializeR600EmitClauseMarkersPass(PassRegistry&);
42
43} // end namespace llvm
44
45namespace {
46
47class R600EmitClauseMarkers : public MachineFunctionPass {
48private:
49  const R600InstrInfo *TII = nullptr;
50  int Address = 0;
51
52  unsigned OccupiedDwords(MachineInstr &MI) const {
53    switch (MI.getOpcode()) {
54    case R600::INTERP_PAIR_XY:
55    case R600::INTERP_PAIR_ZW:
56    case R600::INTERP_VEC_LOAD:
57    case R600::DOT_4:
58      return 4;
59    case R600::KILL:
60      return 0;
61    default:
62      break;
63    }
64
65    // These will be expanded to two ALU instructions in the
66    // ExpandSpecialInstructions pass.
67    if (TII->isLDSRetInstr(MI.getOpcode()))
68      return 2;
69
70    if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
71        TII->isReductionOp(MI.getOpcode()))
72      return 4;
73
74    unsigned NumLiteral = 0;
75    for (MachineInstr::mop_iterator It = MI.operands_begin(),
76                                    E = MI.operands_end();
77         It != E; ++It) {
78      MachineOperand &MO = *It;
79      if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
80        ++NumLiteral;
81    }
82    return 1 + NumLiteral;
83  }
84
85  bool isALU(const MachineInstr &MI) const {
86    if (TII->isALUInstr(MI.getOpcode()))
87      return true;
88    if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
89      return true;
90    switch (MI.getOpcode()) {
91    case R600::PRED_X:
92    case R600::INTERP_PAIR_XY:
93    case R600::INTERP_PAIR_ZW:
94    case R600::INTERP_VEC_LOAD:
95    case R600::COPY:
96    case R600::DOT_4:
97      return true;
98    default:
99      return false;
100    }
101  }
102
103  bool IsTrivialInst(MachineInstr &MI) const {
104    switch (MI.getOpcode()) {
105    case R600::KILL:
106    case R600::RETURN:
107    case R600::IMPLICIT_DEF:
108      return true;
109    default:
110      return false;
111    }
112  }
113
114  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
115    // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
116    // (See also R600ISelLowering.cpp)
117    // ConstIndex value is in [0, 4095];
118    return std::pair<unsigned, unsigned>(
119        ((Sel >> 2) - 512) >> 12, // KC_BANK
120        // Line Number of ConstIndex
121        // A line contains 16 constant registers however KCX bank can lock
122        // two line at the same time ; thus we want to get an even line number.
123        // Line number can be retrieved with (>>4), using (>>5) <<1 generates
124        // an even number.
125        ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
126  }
127
128  bool
129  SubstituteKCacheBank(MachineInstr &MI,
130                       std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
131                       bool UpdateInstr = true) const {
132    std::vector<std::pair<unsigned, unsigned>> UsedKCache;
133
134    if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
135      return true;
136
137    const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
138        TII->getSrcs(MI);
139    assert(
140        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
141        "Can't assign Const");
142    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
143      if (Consts[i].first->getReg() != R600::ALU_CONST)
144        continue;
145      unsigned Sel = Consts[i].second;
146      unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
147      unsigned KCacheIndex = Index * 4 + Chan;
148      const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
149      if (CachedConsts.empty()) {
150        CachedConsts.push_back(BankLine);
151        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
152        continue;
153      }
154      if (CachedConsts[0] == BankLine) {
155        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
156        continue;
157      }
158      if (CachedConsts.size() == 1) {
159        CachedConsts.push_back(BankLine);
160        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
161        continue;
162      }
163      if (CachedConsts[1] == BankLine) {
164        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
165        continue;
166      }
167      return false;
168    }
169
170    if (!UpdateInstr)
171      return true;
172
173    for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
174      if (Consts[i].first->getReg() != R600::ALU_CONST)
175        continue;
176      switch(UsedKCache[j].first) {
177      case 0:
178        Consts[i].first->setReg(
179            R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
180        break;
181      case 1:
182        Consts[i].first->setReg(
183            R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
184        break;
185      default:
186        llvm_unreachable("Wrong Cache Line");
187      }
188      j++;
189    }
190    return true;
191  }
192
193  bool canClauseLocalKillFitInClause(
194                        unsigned AluInstCount,
195                        std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
196                        MachineBasicBlock::iterator Def,
197                        MachineBasicBlock::iterator BBEnd) {
198    const R600RegisterInfo &TRI = TII->getRegisterInfo();
199    //TODO: change this to defs?
200    for (MachineInstr::const_mop_iterator
201           MOI = Def->operands_begin(),
202           MOE = Def->operands_end(); MOI != MOE; ++MOI) {
203      if (!MOI->isReg() || !MOI->isDef() ||
204          TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
205        continue;
206
207      // Def defines a clause local register, so check that its use will fit
208      // in the clause.
209      unsigned LastUseCount = 0;
210      for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
211        AluInstCount += OccupiedDwords(*UseI);
212        // Make sure we won't need to end the clause due to KCache limitations.
213        if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
214          return false;
215
216        // We have reached the maximum instruction limit before finding the
217        // use that kills this register, so we cannot use this def in the
218        // current clause.
219        if (AluInstCount >= TII->getMaxAlusPerClause())
220          return false;
221
222        // TODO: Is this true? kill flag appears to work OK below
223        // Register kill flags have been cleared by the time we get to this
224        // pass, but it is safe to assume that all uses of this register
225        // occur in the same basic block as its definition, because
226        // it is illegal for the scheduler to schedule them in
227        // different blocks.
228        if (UseI->readsRegister(MOI->getReg(), &TRI))
229          LastUseCount = AluInstCount;
230
231        // Exit early if the current use kills the register
232        if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
233          break;
234      }
235      if (LastUseCount)
236        return LastUseCount <= TII->getMaxAlusPerClause();
237      llvm_unreachable("Clause local register live at end of clause.");
238    }
239    return true;
240  }
241
242  MachineBasicBlock::iterator
243  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
244    MachineBasicBlock::iterator ClauseHead = I;
245    std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
246    bool PushBeforeModifier = false;
247    unsigned AluInstCount = 0;
248    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
249      if (IsTrivialInst(*I))
250        continue;
251      if (!isALU(*I))
252        break;
253      if (AluInstCount > TII->getMaxAlusPerClause())
254        break;
255      if (I->getOpcode() == R600::PRED_X) {
256        // We put PRED_X in its own clause to ensure that ifcvt won't create
257        // clauses with more than 128 insts.
258        // IfCvt is indeed checking that "then" and "else" branches of an if
259        // statement have less than ~60 insts thus converted clauses can't be
260        // bigger than ~121 insts (predicate setter needs to be in the same
261        // clause as predicated alus).
262        if (AluInstCount > 0)
263          break;
264        if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
265          PushBeforeModifier = true;
266        AluInstCount ++;
267        continue;
268      }
269      // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
270      //
271      // * KILL or INTERP instructions
272      // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
273      // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
274      //
275      // XXX: These checks have not been implemented yet.
276      if (TII->mustBeLastInClause(I->getOpcode())) {
277        I++;
278        break;
279      }
280
281      // If this instruction defines a clause local register, make sure
282      // its use can fit in this clause.
283      if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
284        break;
285
286      if (!SubstituteKCacheBank(*I, KCacheBanks))
287        break;
288      AluInstCount += OccupiedDwords(*I);
289    }
290    unsigned Opcode = PushBeforeModifier ?
291        R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
292    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
293    // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
294    // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
295    // pass may assume that identical ALU clause starter at the beginning of a
296    // true and false branch can be factorized which is not the case.
297        .addImm(Address++) // ADDR
298        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
299        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
300        .addImm(KCacheBanks.empty()?0:2) // KM0
301        .addImm((KCacheBanks.size() < 2)?0:2) // KM1
302        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
303        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
304        .addImm(AluInstCount) // COUNT
305        .addImm(1); // Enabled
306    return I;
307  }
308
309public:
310  static char ID;
311
312  R600EmitClauseMarkers() : MachineFunctionPass(ID) {
313    initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
314  }
315
316  bool runOnMachineFunction(MachineFunction &MF) override {
317    const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
318    TII = ST.getInstrInfo();
319
320    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
321                                                    BB != BB_E; ++BB) {
322      MachineBasicBlock &MBB = *BB;
323      MachineBasicBlock::iterator I = MBB.begin();
324      if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
325        continue; // BB was already parsed
326      for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
327        if (isALU(*I)) {
328          auto next = MakeALUClause(MBB, I);
329          assert(next != I);
330          I = next;
331        } else
332          ++I;
333      }
334    }
335    return false;
336  }
337
338  StringRef getPassName() const override {
339    return "R600 Emit Clause Markers Pass";
340  }
341};
342
343char R600EmitClauseMarkers::ID = 0;
344
345} // end anonymous namespace
346
347INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
348                      "R600 Emit Clause Markters", false, false)
349INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
350                      "R600 Emit Clause Markters", false, false)
351
352FunctionPass *llvm::createR600EmitClauseMarkers() {
353  return new R600EmitClauseMarkers();
354}
355