1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/InitializePasses.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/raw_ostream.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "mir-canonicalizer"
39
40static cl::opt<unsigned>
41    CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42                               cl::value_desc("N"),
43                               cl::desc("Function number to canonicalize."));
44
45namespace {
46
47class MIRCanonicalizer : public MachineFunctionPass {
48public:
49  static char ID;
50  MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
52  StringRef getPassName() const override {
53    return "Rename register operands in a canonical ordering.";
54  }
55
56  void getAnalysisUsage(AnalysisUsage &AU) const override {
57    AU.setPreservesCFG();
58    MachineFunctionPass::getAnalysisUsage(AU);
59  }
60
61  bool runOnMachineFunction(MachineFunction &MF) override;
62};
63
64} // end anonymous namespace
65
66char MIRCanonicalizer::ID;
67
68char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71                      "Rename Register Operands Canonically", false, false)
72
73INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
74                    "Rename Register Operands Canonically", false, false)
75
76static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
77  if (MF.empty())
78    return {};
79  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
80  std::vector<MachineBasicBlock *> RPOList;
81  append_range(RPOList, RPOT);
82
83  return RPOList;
84}
85
86static bool
87rescheduleLexographically(std::vector<MachineInstr *> instructions,
88                          MachineBasicBlock *MBB,
89                          std::function<MachineBasicBlock::iterator()> getPos) {
90
91  bool Changed = false;
92  using StringInstrPair = std::pair<std::string, MachineInstr *>;
93  std::vector<StringInstrPair> StringInstrMap;
94
95  for (auto *II : instructions) {
96    std::string S;
97    raw_string_ostream OS(S);
98    II->print(OS);
99    OS.flush();
100
101    // Trim the assignment, or start from the beginning in the case of a store.
102    const size_t i = S.find('=');
103    StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104  }
105
106  llvm::sort(StringInstrMap, llvm::less_first());
107
108  for (auto &II : StringInstrMap) {
109
110    LLVM_DEBUG({
111      dbgs() << "Splicing ";
112      II.second->dump();
113      dbgs() << " right before: ";
114      getPos()->dump();
115    });
116
117    Changed = true;
118    MBB->splice(getPos(), MBB, II.second);
119  }
120
121  return Changed;
122}
123
124static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
125                                  MachineBasicBlock *MBB) {
126
127  bool Changed = false;
128
129  // Calculates the distance of MI from the beginning of its parent BB.
130  auto getInstrIdx = [](const MachineInstr &MI) {
131    unsigned i = 0;
132    for (const auto &CurMI : *MI.getParent()) {
133      if (&CurMI == &MI)
134        return i;
135      i++;
136    }
137    return ~0U;
138  };
139
140  // Pre-Populate vector of instructions to reschedule so that we don't
141  // clobber the iterator.
142  std::vector<MachineInstr *> Instructions;
143  for (auto &MI : *MBB) {
144    Instructions.push_back(&MI);
145  }
146
147  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148  std::map<unsigned, MachineInstr *> MultiUserLookup;
149  unsigned UseToBringDefCloserToCount = 0;
150  std::vector<MachineInstr *> PseudoIdempotentInstructions;
151  std::vector<unsigned> PhysRegDefs;
152  for (auto *II : Instructions) {
153    for (unsigned i = 1; i < II->getNumOperands(); i++) {
154      MachineOperand &MO = II->getOperand(i);
155      if (!MO.isReg())
156        continue;
157
158      if (MO.getReg().isVirtual())
159        continue;
160
161      if (!MO.isDef())
162        continue;
163
164      PhysRegDefs.push_back(MO.getReg());
165    }
166  }
167
168  for (auto *II : Instructions) {
169    if (II->getNumOperands() == 0)
170      continue;
171    if (II->mayLoadOrStore())
172      continue;
173
174    MachineOperand &MO = II->getOperand(0);
175    if (!MO.isReg() || !MO.getReg().isVirtual())
176      continue;
177    if (!MO.isDef())
178      continue;
179
180    bool IsPseudoIdempotent = true;
181    for (unsigned i = 1; i < II->getNumOperands(); i++) {
182
183      if (II->getOperand(i).isImm()) {
184        continue;
185      }
186
187      if (II->getOperand(i).isReg()) {
188        if (!II->getOperand(i).getReg().isVirtual())
189          if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
190            continue;
191          }
192      }
193
194      IsPseudoIdempotent = false;
195      break;
196    }
197
198    if (IsPseudoIdempotent) {
199      PseudoIdempotentInstructions.push_back(II);
200      continue;
201    }
202
203    LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204
205    MachineInstr *Def = II;
206    unsigned Distance = ~0U;
207    MachineInstr *UseToBringDefCloserTo = nullptr;
208    MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
209    for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
210      MachineInstr *UseInst = UO.getParent();
211
212      const unsigned DefLoc = getInstrIdx(*Def);
213      const unsigned UseLoc = getInstrIdx(*UseInst);
214      const unsigned Delta = (UseLoc - DefLoc);
215
216      if (UseInst->getParent() != Def->getParent())
217        continue;
218      if (DefLoc >= UseLoc)
219        continue;
220
221      if (Delta < Distance) {
222        Distance = Delta;
223        UseToBringDefCloserTo = UseInst;
224        MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225      }
226    }
227
228    const auto BBE = MBB->instr_end();
229    MachineBasicBlock::iterator DefI = BBE;
230    MachineBasicBlock::iterator UseI = BBE;
231
232    for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233
234      if (DefI != BBE && UseI != BBE)
235        break;
236
237      if (&*BBI == Def) {
238        DefI = BBI;
239        continue;
240      }
241
242      if (&*BBI == UseToBringDefCloserTo) {
243        UseI = BBI;
244        continue;
245      }
246    }
247
248    if (DefI == BBE || UseI == BBE)
249      continue;
250
251    LLVM_DEBUG({
252      dbgs() << "Splicing ";
253      DefI->dump();
254      dbgs() << " right before: ";
255      UseI->dump();
256    });
257
258    MultiUsers[UseToBringDefCloserTo].push_back(Def);
259    Changed = true;
260    MBB->splice(UseI, MBB, DefI);
261  }
262
263  // Sort the defs for users of multiple defs lexographically.
264  for (const auto &E : MultiUserLookup) {
265
266    auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
267      return &MI == E.second;
268    });
269
270    if (UseI == MBB->instr_end())
271      continue;
272
273    LLVM_DEBUG(
274        dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
275    Changed |= rescheduleLexographically(
276        MultiUsers[E.second], MBB,
277        [&]() -> MachineBasicBlock::iterator { return UseI; });
278  }
279
280  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
281  LLVM_DEBUG(
282      dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
283  Changed |= rescheduleLexographically(
284      PseudoIdempotentInstructions, MBB,
285      [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
286
287  return Changed;
288}
289
290static bool propagateLocalCopies(MachineBasicBlock *MBB) {
291  bool Changed = false;
292  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
293
294  std::vector<MachineInstr *> Copies;
295  for (MachineInstr &MI : MBB->instrs()) {
296    if (MI.isCopy())
297      Copies.push_back(&MI);
298  }
299
300  for (MachineInstr *MI : Copies) {
301
302    if (!MI->getOperand(0).isReg())
303      continue;
304    if (!MI->getOperand(1).isReg())
305      continue;
306
307    const Register Dst = MI->getOperand(0).getReg();
308    const Register Src = MI->getOperand(1).getReg();
309
310    if (!Dst.isVirtual())
311      continue;
312    if (!Src.isVirtual())
313      continue;
314    // Not folding COPY instructions if regbankselect has not set the RCs.
315    // Why are we only considering Register Classes? Because the verifier
316    // sometimes gets upset if the register classes don't match even if the
317    // types do. A future patch might add COPY folding for matching types in
318    // pre-registerbankselect code.
319    if (!MRI.getRegClassOrNull(Dst))
320      continue;
321    if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
322      continue;
323
324    std::vector<MachineOperand *> Uses;
325    for (MachineOperand &MO : MRI.use_operands(Dst))
326      Uses.push_back(&MO);
327    for (auto *MO : Uses)
328      MO->setReg(Src);
329
330    Changed = true;
331    MI->eraseFromParent();
332  }
333
334  return Changed;
335}
336
337static bool doDefKillClear(MachineBasicBlock *MBB) {
338  bool Changed = false;
339
340  for (auto &MI : *MBB) {
341    for (auto &MO : MI.operands()) {
342      if (!MO.isReg())
343        continue;
344      if (!MO.isDef() && MO.isKill()) {
345        Changed = true;
346        MO.setIsKill(false);
347      }
348
349      if (MO.isDef() && MO.isDead()) {
350        Changed = true;
351        MO.setIsDead(false);
352      }
353    }
354  }
355
356  return Changed;
357}
358
359static bool runOnBasicBlock(MachineBasicBlock *MBB,
360                            unsigned BasicBlockNum, VRegRenamer &Renamer) {
361  LLVM_DEBUG({
362    dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
363    dbgs() << "\n\n================================================\n\n";
364  });
365
366  bool Changed = false;
367
368  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
369
370  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
371             MBB->dump(););
372  Changed |= propagateLocalCopies(MBB);
373  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
374
375  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
376  unsigned IdempotentInstCount = 0;
377  Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
378  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
379
380  Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
381
382  // TODO: Consider dropping this. Dropping kill defs is probably not
383  // semantically sound.
384  Changed |= doDefKillClear(MBB);
385
386  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
387             dbgs() << "\n";);
388  LLVM_DEBUG(
389      dbgs() << "\n\n================================================\n\n");
390  return Changed;
391}
392
393bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
394
395  static unsigned functionNum = 0;
396  if (CanonicalizeFunctionNumber != ~0U) {
397    if (CanonicalizeFunctionNumber != functionNum++)
398      return false;
399    LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
400                      << "\n";);
401  }
402
403  // we need a valid vreg to create a vreg type for skipping all those
404  // stray vreg numbers so reach alignment/canonical vreg values.
405  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
406
407  LLVM_DEBUG(
408      dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
409      dbgs() << "\n\n================================================\n\n";
410      dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
411      for (auto MBB
412           : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
413      << "\n\n================================================\n\n";);
414
415  unsigned BBNum = 0;
416  bool Changed = false;
417  MachineRegisterInfo &MRI = MF.getRegInfo();
418  VRegRenamer Renamer(MRI);
419  for (auto *MBB : RPOList)
420    Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
421
422  return Changed;
423}
424