AArch64BranchFixupPass.cpp revision 263508
1//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that fixes AArch64 branches which have ended up out
11// of range for their immediate operands.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "aarch64-branch-fixup"
16#include "AArch64.h"
17#include "AArch64InstrInfo.h"
18#include "Utils/AArch64BaseInfo.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Format.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/ADT/Statistic.h"
26using namespace llvm;
27
28STATISTIC(NumSplit,      "Number of uncond branches inserted");
29STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
30
31/// Return the worst case padding that could result from unknown offset bits.
32/// This does not include alignment padding caused by known offset bits.
33///
34/// @param LogAlign log2(alignment)
35/// @param KnownBits Number of known low offset bits.
36static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
37  if (KnownBits < LogAlign)
38    return (1u << LogAlign) - (1u << KnownBits);
39  return 0;
40}
41
42namespace {
43  /// Due to limited PC-relative displacements, conditional branches to distant
44  /// blocks may need converting into an unconditional equivalent. For example:
45  ///     tbz w1, #0, far_away
46  /// becomes
47  ///     tbnz w1, #0, skip
48  ///     b far_away
49  ///   skip:
50  class AArch64BranchFixup : public MachineFunctionPass {
51    /// Information about the offset and size of a single basic block.
52    struct BasicBlockInfo {
53      /// Distance from the beginning of the function to the beginning of this
54      /// basic block.
55      ///
56      /// Offsets are computed assuming worst case padding before an aligned
57      /// block. This means that subtracting basic block offsets always gives a
58      /// conservative estimate of the real distance which may be smaller.
59      ///
60      /// Because worst case padding is used, the computed offset of an aligned
61      /// block may not actually be aligned.
62      unsigned Offset;
63
64      /// Size of the basic block in bytes.  If the block contains inline
65      /// assembly, this is a worst case estimate.
66      ///
67      /// The size does not include any alignment padding whether from the
68      /// beginning of the block, or from an aligned jump table at the end.
69      unsigned Size;
70
71      /// The number of low bits in Offset that are known to be exact.  The
72      /// remaining bits of Offset are an upper bound.
73      uint8_t KnownBits;
74
75      /// When non-zero, the block contains instructions (inline asm) of unknown
76      /// size.  The real size may be smaller than Size bytes by a multiple of 1
77      /// << Unalign.
78      uint8_t Unalign;
79
80      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
81
82      /// Compute the number of known offset bits internally to this block.
83      /// This number should be used to predict worst case padding when
84      /// splitting the block.
85      unsigned internalKnownBits() const {
86        unsigned Bits = Unalign ? Unalign : KnownBits;
87        // If the block size isn't a multiple of the known bits, assume the
88        // worst case padding.
89        if (Size & ((1u << Bits) - 1))
90          Bits = countTrailingZeros(Size);
91        return Bits;
92      }
93
94      /// Compute the offset immediately following this block.  If LogAlign is
95      /// specified, return the offset the successor block will get if it has
96      /// this alignment.
97      unsigned postOffset(unsigned LogAlign = 0) const {
98        unsigned PO = Offset + Size;
99        if (!LogAlign)
100          return PO;
101        // Add alignment padding from the terminator.
102        return PO + UnknownPadding(LogAlign, internalKnownBits());
103      }
104
105      /// Compute the number of known low bits of postOffset.  If this block
106      /// contains inline asm, the number of known bits drops to the
107      /// instruction alignment.  An aligned terminator may increase the number
108      /// of know bits.
109      /// If LogAlign is given, also consider the alignment of the next block.
110      unsigned postKnownBits(unsigned LogAlign = 0) const {
111        return std::max(LogAlign, internalKnownBits());
112      }
113    };
114
115    std::vector<BasicBlockInfo> BBInfo;
116
117    /// One per immediate branch, keeping the machine instruction pointer,
118    /// conditional or unconditional, the max displacement, and (if IsCond is
119    /// true) the corresponding inverted branch opcode.
120    struct ImmBranch {
121      MachineInstr *MI;
122      unsigned OffsetBits : 31;
123      bool IsCond : 1;
124      ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
125        : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
126    };
127
128    /// Keep track of all the immediate branch instructions.
129    ///
130    std::vector<ImmBranch> ImmBranches;
131
132    MachineFunction *MF;
133    const AArch64InstrInfo *TII;
134  public:
135    static char ID;
136    AArch64BranchFixup() : MachineFunctionPass(ID) {}
137
138    virtual bool runOnMachineFunction(MachineFunction &MF);
139
140    virtual const char *getPassName() const {
141      return "AArch64 branch fixup pass";
142    }
143
144  private:
145    void initializeFunctionInfo();
146    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
147    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
148    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
149                     unsigned OffsetBits);
150    bool fixupImmediateBr(ImmBranch &Br);
151    bool fixupConditionalBr(ImmBranch &Br);
152
153    void computeBlockSize(MachineBasicBlock *MBB);
154    unsigned getOffsetOf(MachineInstr *MI) const;
155    void dumpBBs();
156    void verify();
157  };
158  char AArch64BranchFixup::ID = 0;
159}
160
161/// check BBOffsets
162void AArch64BranchFixup::verify() {
163#ifndef NDEBUG
164  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
165       MBBI != E; ++MBBI) {
166    MachineBasicBlock *MBB = MBBI;
167    unsigned MBBId = MBB->getNumber();
168    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
169  }
170#endif
171}
172
173/// print block size and offset information - debugging
174void AArch64BranchFixup::dumpBBs() {
175  DEBUG({
176    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
177      const BasicBlockInfo &BBI = BBInfo[J];
178      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
179             << " kb=" << unsigned(BBI.KnownBits)
180             << " ua=" << unsigned(BBI.Unalign)
181             << format(" size=%#x\n", BBInfo[J].Size);
182    }
183  });
184}
185
186/// Returns an instance of the branch fixup pass.
187FunctionPass *llvm::createAArch64BranchFixupPass() {
188  return new AArch64BranchFixup();
189}
190
191bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
192  MF = &mf;
193  DEBUG(dbgs() << "***** AArch64BranchFixup ******");
194  TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
195
196  // This pass invalidates liveness information when it splits basic blocks.
197  MF->getRegInfo().invalidateLiveness();
198
199  // Renumber all of the machine basic blocks in the function, guaranteeing that
200  // the numbers agree with the position of the block in the function.
201  MF->RenumberBlocks();
202
203  // Do the initial scan of the function, building up information about the
204  // sizes of each block and location of each immediate branch.
205  initializeFunctionInfo();
206
207  // Iteratively fix up branches until there is no change.
208  unsigned NoBRIters = 0;
209  bool MadeChange = false;
210  while (true) {
211    DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
212    bool BRChange = false;
213    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
214      BRChange |= fixupImmediateBr(ImmBranches[i]);
215    if (BRChange && ++NoBRIters > 30)
216      report_fatal_error("Branch Fix Up pass failed to converge!");
217    DEBUG(dumpBBs());
218
219    if (!BRChange)
220      break;
221    MadeChange = true;
222  }
223
224  // After a while, this might be made debug-only, but it is not expensive.
225  verify();
226
227  DEBUG(dbgs() << '\n'; dumpBBs());
228
229  BBInfo.clear();
230  ImmBranches.clear();
231
232  return MadeChange;
233}
234
235/// Return true if the specified basic block can fallthrough into the block
236/// immediately after it.
237static bool BBHasFallthrough(MachineBasicBlock *MBB) {
238  // Get the next machine basic block in the function.
239  MachineFunction::iterator MBBI = MBB;
240  // Can't fall off end of function.
241  if (llvm::next(MBBI) == MBB->getParent()->end())
242    return false;
243
244  MachineBasicBlock *NextBB = llvm::next(MBBI);
245  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
246       E = MBB->succ_end(); I != E; ++I)
247    if (*I == NextBB)
248      return true;
249
250  return false;
251}
252
253/// Do the initial scan of the function, building up information about the sizes
254/// of each block, and each immediate branch.
255void AArch64BranchFixup::initializeFunctionInfo() {
256  BBInfo.clear();
257  BBInfo.resize(MF->getNumBlockIDs());
258
259  // First thing, compute the size of all basic blocks, and see if the function
260  // has any inline assembly in it. If so, we have to be conservative about
261  // alignment assumptions, as we don't know for sure the size of any
262  // instructions in the inline assembly.
263  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
264    computeBlockSize(I);
265
266  // The known bits of the entry block offset are determined by the function
267  // alignment.
268  BBInfo.front().KnownBits = MF->getAlignment();
269
270  // Compute block offsets and known bits.
271  adjustBBOffsetsAfter(MF->begin());
272
273  // Now go back through the instructions and build up our data structures.
274  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
275       MBBI != E; ++MBBI) {
276    MachineBasicBlock &MBB = *MBBI;
277
278    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
279         I != E; ++I) {
280      if (I->isDebugValue())
281        continue;
282
283      int Opc = I->getOpcode();
284      if (I->isBranch()) {
285        bool IsCond = false;
286
287        // The offsets encoded in instructions here scale by the instruction
288        // size (4 bytes), effectively increasing their range by 2 bits.
289        unsigned Bits = 0;
290        switch (Opc) {
291        default:
292          continue;  // Ignore other JT branches
293        case AArch64::TBZxii:
294        case AArch64::TBZwii:
295        case AArch64::TBNZxii:
296        case AArch64::TBNZwii:
297          IsCond = true;
298          Bits = 14 + 2;
299          break;
300        case AArch64::Bcc:
301        case AArch64::CBZx:
302        case AArch64::CBZw:
303        case AArch64::CBNZx:
304        case AArch64::CBNZw:
305          IsCond = true;
306          Bits = 19 + 2;
307          break;
308        case AArch64::Bimm:
309          Bits = 26 + 2;
310          break;
311        }
312
313        // Record this immediate branch.
314        ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
315      }
316    }
317  }
318}
319
320/// Compute the size and some alignment information for MBB.  This function
321/// updates BBInfo directly.
322void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
323  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
324  BBI.Size = 0;
325  BBI.Unalign = 0;
326
327  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
328       ++I) {
329    BBI.Size += TII->getInstSizeInBytes(*I);
330    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
331    // The actual size may be smaller, but still a multiple of the instr size.
332    if (I->isInlineAsm())
333      BBI.Unalign = 2;
334  }
335}
336
337/// Return the current offset of the specified machine instruction from the
338/// start of the function.  This offset changes as stuff is moved around inside
339/// the function.
340unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
341  MachineBasicBlock *MBB = MI->getParent();
342
343  // The offset is composed of two things: the sum of the sizes of all MBB's
344  // before this instruction's block, and the offset from the start of the block
345  // it is in.
346  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
347
348  // Sum instructions before MI in MBB.
349  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
350    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
351    Offset += TII->getInstSizeInBytes(*I);
352  }
353  return Offset;
354}
355
356/// Split the basic block containing MI into two blocks, which are joined by
357/// an unconditional branch.  Update data structures and renumber blocks to
358/// account for this change and returns the newly created block.
359MachineBasicBlock *
360AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
361  MachineBasicBlock *OrigBB = MI->getParent();
362
363  // Create a new MBB for the code after the OrigBB.
364  MachineBasicBlock *NewBB =
365    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
366  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
367  MF->insert(MBBI, NewBB);
368
369  // Splice the instructions starting with MI over to NewBB.
370  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
371
372  // Add an unconditional branch from OrigBB to NewBB.
373  // Note the new unconditional branch is not being recorded.
374  // There doesn't seem to be meaningful DebugInfo available; this doesn't
375  // correspond to anything in the source.
376  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
377  ++NumSplit;
378
379  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
380  NewBB->transferSuccessors(OrigBB);
381
382  // OrigBB branches to NewBB.
383  OrigBB->addSuccessor(NewBB);
384
385  // Update internal data structures to account for the newly inserted MBB.
386  MF->RenumberBlocks(NewBB);
387
388  // Insert an entry into BBInfo to align it properly with the (newly
389  // renumbered) block numbers.
390  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
391
392  // Figure out how large the OrigBB is.  As the first half of the original
393  // block, it cannot contain a tablejump.  The size includes
394  // the new jump we added.  (It should be possible to do this without
395  // recounting everything, but it's very confusing, and this is rarely
396  // executed.)
397  computeBlockSize(OrigBB);
398
399  // Figure out how large the NewMBB is.  As the second half of the original
400  // block, it may contain a tablejump.
401  computeBlockSize(NewBB);
402
403  // All BBOffsets following these blocks must be modified.
404  adjustBBOffsetsAfter(OrigBB);
405
406  return NewBB;
407}
408
409void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
410  unsigned BBNum = BB->getNumber();
411  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
412    // Get the offset and known bits at the end of the layout predecessor.
413    // Include the alignment of the current block.
414    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
415    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
416    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
417
418    // This is where block i begins.  Stop if the offset is already correct,
419    // and we have updated 2 blocks.  This is the maximum number of blocks
420    // changed before calling this function.
421    if (i > BBNum + 2 &&
422        BBInfo[i].Offset == Offset &&
423        BBInfo[i].KnownBits == KnownBits)
424      break;
425
426    BBInfo[i].Offset = Offset;
427    BBInfo[i].KnownBits = KnownBits;
428  }
429}
430
431/// Returns true if the distance between specific MI and specific BB can fit in
432/// MI's displacement field.
433bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
434                                     MachineBasicBlock *DestBB,
435                                     unsigned OffsetBits) {
436  int64_t BrOffset   = getOffsetOf(MI);
437  int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
438
439  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
440               << " from BB#" << MI->getParent()->getNumber()
441               << " bits available=" << OffsetBits
442               << " from " << getOffsetOf(MI) << " to " << DestOffset
443               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
444
445  return isIntN(OffsetBits, DestOffset - BrOffset);
446}
447
448/// Fix up an immediate branch whose destination is too far away to fit in its
449/// displacement field.
450bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
451  MachineInstr *MI = Br.MI;
452  MachineBasicBlock *DestBB = 0;
453  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
454    if (MI->getOperand(i).isMBB()) {
455      DestBB = MI->getOperand(i).getMBB();
456      break;
457    }
458  }
459  assert(DestBB && "Branch with no destination BB?");
460
461  // Check to see if the DestBB is already in-range.
462  if (isBBInRange(MI, DestBB, Br.OffsetBits))
463    return false;
464
465  assert(Br.IsCond && "Only conditional branches should need fixup");
466  return fixupConditionalBr(Br);
467}
468
469/// Fix up a conditional branch whose destination is too far away to fit in its
470/// displacement field. It is converted to an inverse conditional branch + an
471/// unconditional branch to the destination.
472bool
473AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
474  MachineInstr *MI = Br.MI;
475  MachineBasicBlock *MBB = MI->getParent();
476  unsigned CondBrMBBOperand = 0;
477
478  // The general idea is to add an unconditional branch to the destination and
479  // invert the conditional branch to jump over it. Complications occur around
480  // fallthrough and unreachable ends to the block.
481  //   b.lt L1
482  //   =>
483  //   b.ge L2
484  //   b   L1
485  // L2:
486
487  // First we invert the conditional branch, by creating a replacement if
488  // necessary. This if statement contains all the special handling of different
489  // branch types.
490  if (MI->getOpcode() == AArch64::Bcc) {
491    // The basic block is operand number 1 for Bcc
492    CondBrMBBOperand = 1;
493
494    A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
495    CC = A64InvertCondCode(CC);
496    MI->getOperand(0).setImm(CC);
497  } else {
498    MachineInstrBuilder InvertedMI;
499    int InvertedOpcode;
500    switch (MI->getOpcode()) {
501    default: llvm_unreachable("Unknown branch type");
502    case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
503    case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
504    case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
505    case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
506    case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
507    case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
508    case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
509    case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
510    }
511
512    InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
513    for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
514      InvertedMI.addOperand(MI->getOperand(i));
515      if (MI->getOperand(i).isMBB())
516        CondBrMBBOperand = i;
517    }
518
519    MI->eraseFromParent();
520    MI = Br.MI = InvertedMI;
521  }
522
523  // If the branch is at the end of its MBB and that has a fall-through block,
524  // direct the updated conditional branch to the fall-through
525  // block. Otherwise, split the MBB before the next instruction.
526  MachineInstr *BMI = &MBB->back();
527  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
528
529  ++NumCBrFixed;
530  if (BMI != MI) {
531    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
532        BMI->getOpcode() == AArch64::Bimm) {
533      // Last MI in the BB is an unconditional branch. We can swap destinations:
534      // b.eq L1 (temporarily b.ne L1 after first change)
535      // b   L2
536      // =>
537      // b.ne L2
538      // b   L1
539      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
540      if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
541        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
542                     << *BMI);
543        MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
544        BMI->getOperand(0).setMBB(DestBB);
545        MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
546        return true;
547      }
548    }
549  }
550
551  if (NeedSplit) {
552    MachineBasicBlock::iterator MBBI = MI; ++MBBI;
553    splitBlockBeforeInstr(MBBI);
554    // No need for the branch to the next block. We're adding an unconditional
555    // branch to the destination.
556    int delta = TII->getInstSizeInBytes(MBB->back());
557    BBInfo[MBB->getNumber()].Size -= delta;
558    MBB->back().eraseFromParent();
559    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
560  }
561
562  // After splitting and removing the unconditional branch from the original BB,
563  // the structure is now:
564  // oldbb:
565  //   [things]
566  //   b.invertedCC L1
567  // splitbb/fallthroughbb:
568  //   [old b L2/real continuation]
569  //
570  // We now have to change the conditional branch to point to splitbb and add an
571  // unconditional branch after it to L1, giving the final structure:
572  // oldbb:
573  //   [things]
574  //   b.invertedCC splitbb
575  //   b L1
576  // splitbb/fallthroughbb:
577  //   [old b L2/real continuation]
578  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
579
580  DEBUG(dbgs() << "  Insert B to BB#"
581               << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
582               << " also invert condition and change dest. to BB#"
583               << NextBB->getNumber() << "\n");
584
585  // Insert a new unconditional branch and fixup the destination of the
586  // conditional one.  Also update the ImmBranch as well as adding a new entry
587  // for the new branch.
588  BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
589    .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
590  MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
591
592  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
593
594  // 26 bits written down in Bimm, specifying a multiple of 4.
595  unsigned OffsetBits = 26 + 2;
596  ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
597
598  adjustBBOffsetsAfter(MBB);
599  return true;
600}
601