1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11#include "AArch64.h"
12#include "AArch64MachineFunctionInfo.h"
13#include "AArch64InstrInfo.h"
14#include "llvm/ADT/DepthFirstIterator.h"
15#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineLoopInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/MachineTraceMetrics.h"
25#include "llvm/CodeGen/Passes.h"
26#include "llvm/CodeGen/TargetInstrInfo.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSubtargetInfo.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
36
37enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
38
39cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
40    "stack-tagging-unchecked-ld-st", cl::Hidden,
41    cl::init(UncheckedSafe),
42    cl::desc(
43        "Unconditionally apply unchecked-ld-st optimization (even for large "
44        "stack frames, or in the presence of variable sized allocas)."),
45    cl::values(
46        clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
47        clEnumValN(
48            UncheckedSafe, "safe",
49            "apply unchecked-ld-st when the target is definitely within range"),
50        clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
51
52static cl::opt<bool>
53    ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
54                cl::ZeroOrMore,
55                cl::desc("Apply first slot optimization for stack tagging "
56                         "(eliminate ADDG Rt, Rn, 0, 0)."));
57
58namespace {
59
60class AArch64StackTaggingPreRA : public MachineFunctionPass {
61  MachineFunction *MF;
62  AArch64FunctionInfo *AFI;
63  MachineFrameInfo *MFI;
64  MachineRegisterInfo *MRI;
65  const AArch64RegisterInfo *TRI;
66  const AArch64InstrInfo *TII;
67
68  SmallVector<MachineInstr*, 16> ReTags;
69
70public:
71  static char ID;
72  AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
73    initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
74  }
75
76  bool mayUseUncheckedLoadStore();
77  void uncheckUsesOf(unsigned TaggedReg, int FI);
78  void uncheckLoadsAndStores();
79  Optional<int> findFirstSlotCandidate();
80
81  bool runOnMachineFunction(MachineFunction &Func) override;
82  StringRef getPassName() const override {
83    return "AArch64 Stack Tagging PreRA";
84  }
85
86  void getAnalysisUsage(AnalysisUsage &AU) const override {
87    AU.setPreservesCFG();
88    MachineFunctionPass::getAnalysisUsage(AU);
89  }
90};
91} // end anonymous namespace
92
93char AArch64StackTaggingPreRA::ID = 0;
94
95INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96                      "AArch64 Stack Tagging PreRA Pass", false, false)
97INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
98                    "AArch64 Stack Tagging PreRA Pass", false, false)
99
100FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
101  return new AArch64StackTaggingPreRA();
102}
103
104static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
105  switch (Opcode) {
106  case AArch64::LDRBBui:
107  case AArch64::LDRHHui:
108  case AArch64::LDRWui:
109  case AArch64::LDRXui:
110
111  case AArch64::LDRBui:
112  case AArch64::LDRHui:
113  case AArch64::LDRSui:
114  case AArch64::LDRDui:
115  case AArch64::LDRQui:
116
117  case AArch64::LDRSHWui:
118  case AArch64::LDRSHXui:
119
120  case AArch64::LDRSBWui:
121  case AArch64::LDRSBXui:
122
123  case AArch64::LDRSWui:
124
125  case AArch64::STRBBui:
126  case AArch64::STRHHui:
127  case AArch64::STRWui:
128  case AArch64::STRXui:
129
130  case AArch64::STRBui:
131  case AArch64::STRHui:
132  case AArch64::STRSui:
133  case AArch64::STRDui:
134  case AArch64::STRQui:
135
136  case AArch64::LDPWi:
137  case AArch64::LDPXi:
138  case AArch64::LDPSi:
139  case AArch64::LDPDi:
140  case AArch64::LDPQi:
141
142  case AArch64::LDPSWi:
143
144  case AArch64::STPWi:
145  case AArch64::STPXi:
146  case AArch64::STPSi:
147  case AArch64::STPDi:
148  case AArch64::STPQi:
149    return true;
150  default:
151    return false;
152  }
153}
154
155bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
156  if (ClUncheckedLdSt == UncheckedNever)
157    return false;
158  else if (ClUncheckedLdSt == UncheckedAlways)
159    return true;
160
161  // This estimate can be improved if we had harder guarantees about stack frame
162  // layout. With LocalStackAllocation we can estimate SP offset to any
163  // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
164  // objects ahead of non-tagged ones, but that's not always desirable.
165  //
166  // Underestimating SP offset here may require the use of LDG to materialize
167  // the tagged address of the stack slot, along with a scratch register
168  // allocation (post-regalloc!).
169  //
170  // For now we do the safe thing here and require that the entire stack frame
171  // is within range of the shortest of the unchecked instructions.
172  unsigned FrameSize = 0;
173  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
174    FrameSize += MFI->getObjectSize(i);
175  bool EntireFrameReachableFromSP = FrameSize < 0xf00;
176  return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
177}
178
179void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
180  for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
181       UI != E;) {
182    MachineInstr *UseI = &*(UI++);
183    if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
184      // FI operand is always the one before the immediate offset.
185      unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
186      if (UseI->getOperand(OpIdx).isReg() &&
187          UseI->getOperand(OpIdx).getReg() == TaggedReg) {
188        UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
189        UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
190      }
191    } else if (UseI->isCopy() &&
192               Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
193      uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
194    }
195  }
196}
197
198void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
199  for (auto *I : ReTags) {
200    unsigned TaggedReg = I->getOperand(0).getReg();
201    int FI = I->getOperand(1).getIndex();
202    uncheckUsesOf(TaggedReg, FI);
203  }
204}
205
206namespace {
207struct SlotWithTag {
208  int FI;
209  int Tag;
210  SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
211  explicit SlotWithTag(const MachineInstr &MI)
212      : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
213  bool operator==(const SlotWithTag &Other) const {
214    return FI == Other.FI && Tag == Other.Tag;
215  }
216};
217} // namespace
218
219namespace llvm {
220template <> struct DenseMapInfo<SlotWithTag> {
221  static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
222  static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
223  static unsigned getHashValue(const SlotWithTag &V) {
224    return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
225                        DenseMapInfo<int>::getHashValue(V.Tag));
226  }
227  static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
228    return A == B;
229  }
230};
231} // namespace llvm
232
233static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
234  return MFI->getUseLocalStackAllocationBlock() &&
235         MFI->isObjectPreAllocated(FI);
236}
237
238// Pin one of the tagged slots to offset 0 from the tagged base pointer.
239// This would make its address available in a virtual register (IRG's def), as
240// opposed to requiring an ADDG instruction to materialize. This effectively
241// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
242// live almost everywhere anyway), and therefore needs to happen before
243// regalloc.
244Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
245  // Find the best (FI, Tag) pair to pin to offset 0.
246  // Looking at the possible uses of a tagged address, the advantage of pinning
247  // is:
248  // - COPY to physical register.
249  //   Does not matter, this would trade a MOV instruction for an ADDG.
250  // - ST*G matter, but those mostly appear near the function prologue where all
251  //   the tagged addresses need to be materialized anyway; also, counting ST*G
252  //   uses would overweight large allocas that require more than one ST*G
253  //   instruction.
254  // - Load/Store instructions in the address operand do not require a tagged
255  //   pointer, so they also do not benefit. These operands have already been
256  //   eliminated (see uncheckLoadsAndStores) so all remaining load/store
257  //   instructions count.
258  // - Any other instruction may benefit from being pinned to offset 0.
259  LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
260  if (!ClFirstSlot)
261    return None;
262
263  DenseMap<SlotWithTag, int> RetagScore;
264  SlotWithTag MaxScoreST{-1, -1};
265  int MaxScore = -1;
266  for (auto *I : ReTags) {
267    SlotWithTag ST{*I};
268    if (isSlotPreAllocated(MFI, ST.FI))
269      continue;
270
271    Register RetagReg = I->getOperand(0).getReg();
272    if (!Register::isVirtualRegister(RetagReg))
273      continue;
274
275    int Score = 0;
276    SmallVector<Register, 8> WorkList;
277    WorkList.push_back(RetagReg);
278
279    while (!WorkList.empty()) {
280      Register UseReg = WorkList.back();
281      WorkList.pop_back();
282      for (auto &UseI : MRI->use_instructions(UseReg)) {
283        unsigned Opcode = UseI.getOpcode();
284        if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
285            Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
286            Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
287            Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
288            Opcode == AArch64::STZGloop_wback)
289          continue;
290        if (UseI.isCopy()) {
291          Register DstReg = UseI.getOperand(0).getReg();
292          if (Register::isVirtualRegister(DstReg))
293            WorkList.push_back(DstReg);
294          continue;
295        }
296        LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
297                          << Register::virtReg2Index(UseReg) << " in " << UseI
298                          << "\n");
299        Score++;
300      }
301    }
302
303    int TotalScore = RetagScore[ST] += Score;
304    if (TotalScore > MaxScore ||
305        (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
306      MaxScore = TotalScore;
307      MaxScoreST = ST;
308    }
309  }
310
311  if (MaxScoreST.FI < 0)
312    return None;
313
314  // If FI's tag is already 0, we are done.
315  if (MaxScoreST.Tag == 0)
316    return MaxScoreST.FI;
317
318  // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
319  SlotWithTag SwapST{-1, -1};
320  for (auto *I : ReTags) {
321    SlotWithTag ST{*I};
322    if (ST.Tag == 0) {
323      SwapST = ST;
324      break;
325    }
326  }
327
328  // Swap tags between the victim and the highest scoring pair.
329  // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
330  // the highest score slot without changing anything else.
331  for (auto *&I : ReTags) {
332    SlotWithTag ST{*I};
333    MachineOperand &TagOp = I->getOperand(4);
334    if (ST == MaxScoreST) {
335      TagOp.setImm(0);
336    } else if (ST == SwapST) {
337      TagOp.setImm(MaxScoreST.Tag);
338    }
339  }
340  return MaxScoreST.FI;
341}
342
343bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
344  MF = &Func;
345  MRI = &MF->getRegInfo();
346  AFI = MF->getInfo<AArch64FunctionInfo>();
347  TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
348  TRI = static_cast<const AArch64RegisterInfo *>(
349      MF->getSubtarget().getRegisterInfo());
350  MFI = &MF->getFrameInfo();
351  ReTags.clear();
352
353  assert(MRI->isSSA());
354
355  LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
356                    << "********** Function: " << MF->getName() << '\n');
357
358  SmallSetVector<int, 8> TaggedSlots;
359  for (auto &BB : *MF) {
360    for (auto &I : BB) {
361      if (I.getOpcode() == AArch64::TAGPstack) {
362        ReTags.push_back(&I);
363        int FI = I.getOperand(1).getIndex();
364        TaggedSlots.insert(FI);
365        // There should be no offsets in TAGP yet.
366        assert(I.getOperand(2).getImm() == 0);
367      }
368    }
369  }
370
371  // Take over from SSP. It does nothing for tagged slots, and should not really
372  // have been enabled in the first place.
373  for (int FI : TaggedSlots)
374    MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
375
376  if (ReTags.empty())
377    return false;
378
379  if (mayUseUncheckedLoadStore())
380    uncheckLoadsAndStores();
381
382  // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
383  // If the base tagged pointer is set up to the address of this slot,
384  // the ADDG instruction can be eliminated.
385  Optional<int> BaseSlot = findFirstSlotCandidate();
386  if (BaseSlot)
387    AFI->setTaggedBasePointerIndex(*BaseSlot);
388
389  for (auto *I : ReTags) {
390    int FI = I->getOperand(1).getIndex();
391    int Tag = I->getOperand(4).getImm();
392    Register Base = I->getOperand(3).getReg();
393    if (Tag == 0 && FI == BaseSlot) {
394      BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
395              I->getOperand(0).getReg())
396          .addReg(Base);
397      I->eraseFromParent();
398    }
399  }
400
401  return true;
402}
403