AArch64BranchFixupPass.cpp revision 263508
1//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that fixes AArch64 branches which have ended up out 11// of range for their immediate operands. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "aarch64-branch-fixup" 16#include "AArch64.h" 17#include "AArch64InstrInfo.h" 18#include "Utils/AArch64BaseInfo.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/Support/Debug.h" 23#include "llvm/Support/Format.h" 24#include "llvm/Support/raw_ostream.h" 25#include "llvm/ADT/Statistic.h" 26using namespace llvm; 27 28STATISTIC(NumSplit, "Number of uncond branches inserted"); 29STATISTIC(NumCBrFixed, "Number of cond branches fixed"); 30 31/// Return the worst case padding that could result from unknown offset bits. 32/// This does not include alignment padding caused by known offset bits. 33/// 34/// @param LogAlign log2(alignment) 35/// @param KnownBits Number of known low offset bits. 36static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { 37 if (KnownBits < LogAlign) 38 return (1u << LogAlign) - (1u << KnownBits); 39 return 0; 40} 41 42namespace { 43 /// Due to limited PC-relative displacements, conditional branches to distant 44 /// blocks may need converting into an unconditional equivalent. For example: 45 /// tbz w1, #0, far_away 46 /// becomes 47 /// tbnz w1, #0, skip 48 /// b far_away 49 /// skip: 50 class AArch64BranchFixup : public MachineFunctionPass { 51 /// Information about the offset and size of a single basic block. 52 struct BasicBlockInfo { 53 /// Distance from the beginning of the function to the beginning of this 54 /// basic block. 55 /// 56 /// Offsets are computed assuming worst case padding before an aligned 57 /// block. This means that subtracting basic block offsets always gives a 58 /// conservative estimate of the real distance which may be smaller. 59 /// 60 /// Because worst case padding is used, the computed offset of an aligned 61 /// block may not actually be aligned. 62 unsigned Offset; 63 64 /// Size of the basic block in bytes. If the block contains inline 65 /// assembly, this is a worst case estimate. 66 /// 67 /// The size does not include any alignment padding whether from the 68 /// beginning of the block, or from an aligned jump table at the end. 69 unsigned Size; 70 71 /// The number of low bits in Offset that are known to be exact. The 72 /// remaining bits of Offset are an upper bound. 73 uint8_t KnownBits; 74 75 /// When non-zero, the block contains instructions (inline asm) of unknown 76 /// size. The real size may be smaller than Size bytes by a multiple of 1 77 /// << Unalign. 78 uint8_t Unalign; 79 80 BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} 81 82 /// Compute the number of known offset bits internally to this block. 83 /// This number should be used to predict worst case padding when 84 /// splitting the block. 85 unsigned internalKnownBits() const { 86 unsigned Bits = Unalign ? Unalign : KnownBits; 87 // If the block size isn't a multiple of the known bits, assume the 88 // worst case padding. 89 if (Size & ((1u << Bits) - 1)) 90 Bits = countTrailingZeros(Size); 91 return Bits; 92 } 93 94 /// Compute the offset immediately following this block. If LogAlign is 95 /// specified, return the offset the successor block will get if it has 96 /// this alignment. 97 unsigned postOffset(unsigned LogAlign = 0) const { 98 unsigned PO = Offset + Size; 99 if (!LogAlign) 100 return PO; 101 // Add alignment padding from the terminator. 102 return PO + UnknownPadding(LogAlign, internalKnownBits()); 103 } 104 105 /// Compute the number of known low bits of postOffset. If this block 106 /// contains inline asm, the number of known bits drops to the 107 /// instruction alignment. An aligned terminator may increase the number 108 /// of know bits. 109 /// If LogAlign is given, also consider the alignment of the next block. 110 unsigned postKnownBits(unsigned LogAlign = 0) const { 111 return std::max(LogAlign, internalKnownBits()); 112 } 113 }; 114 115 std::vector<BasicBlockInfo> BBInfo; 116 117 /// One per immediate branch, keeping the machine instruction pointer, 118 /// conditional or unconditional, the max displacement, and (if IsCond is 119 /// true) the corresponding inverted branch opcode. 120 struct ImmBranch { 121 MachineInstr *MI; 122 unsigned OffsetBits : 31; 123 bool IsCond : 1; 124 ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) 125 : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} 126 }; 127 128 /// Keep track of all the immediate branch instructions. 129 /// 130 std::vector<ImmBranch> ImmBranches; 131 132 MachineFunction *MF; 133 const AArch64InstrInfo *TII; 134 public: 135 static char ID; 136 AArch64BranchFixup() : MachineFunctionPass(ID) {} 137 138 virtual bool runOnMachineFunction(MachineFunction &MF); 139 140 virtual const char *getPassName() const { 141 return "AArch64 branch fixup pass"; 142 } 143 144 private: 145 void initializeFunctionInfo(); 146 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); 147 void adjustBBOffsetsAfter(MachineBasicBlock *BB); 148 bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, 149 unsigned OffsetBits); 150 bool fixupImmediateBr(ImmBranch &Br); 151 bool fixupConditionalBr(ImmBranch &Br); 152 153 void computeBlockSize(MachineBasicBlock *MBB); 154 unsigned getOffsetOf(MachineInstr *MI) const; 155 void dumpBBs(); 156 void verify(); 157 }; 158 char AArch64BranchFixup::ID = 0; 159} 160 161/// check BBOffsets 162void AArch64BranchFixup::verify() { 163#ifndef NDEBUG 164 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); 165 MBBI != E; ++MBBI) { 166 MachineBasicBlock *MBB = MBBI; 167 unsigned MBBId = MBB->getNumber(); 168 assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); 169 } 170#endif 171} 172 173/// print block size and offset information - debugging 174void AArch64BranchFixup::dumpBBs() { 175 DEBUG({ 176 for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { 177 const BasicBlockInfo &BBI = BBInfo[J]; 178 dbgs() << format("%08x BB#%u\t", BBI.Offset, J) 179 << " kb=" << unsigned(BBI.KnownBits) 180 << " ua=" << unsigned(BBI.Unalign) 181 << format(" size=%#x\n", BBInfo[J].Size); 182 } 183 }); 184} 185 186/// Returns an instance of the branch fixup pass. 187FunctionPass *llvm::createAArch64BranchFixupPass() { 188 return new AArch64BranchFixup(); 189} 190 191bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { 192 MF = &mf; 193 DEBUG(dbgs() << "***** AArch64BranchFixup ******"); 194 TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); 195 196 // This pass invalidates liveness information when it splits basic blocks. 197 MF->getRegInfo().invalidateLiveness(); 198 199 // Renumber all of the machine basic blocks in the function, guaranteeing that 200 // the numbers agree with the position of the block in the function. 201 MF->RenumberBlocks(); 202 203 // Do the initial scan of the function, building up information about the 204 // sizes of each block and location of each immediate branch. 205 initializeFunctionInfo(); 206 207 // Iteratively fix up branches until there is no change. 208 unsigned NoBRIters = 0; 209 bool MadeChange = false; 210 while (true) { 211 DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); 212 bool BRChange = false; 213 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) 214 BRChange |= fixupImmediateBr(ImmBranches[i]); 215 if (BRChange && ++NoBRIters > 30) 216 report_fatal_error("Branch Fix Up pass failed to converge!"); 217 DEBUG(dumpBBs()); 218 219 if (!BRChange) 220 break; 221 MadeChange = true; 222 } 223 224 // After a while, this might be made debug-only, but it is not expensive. 225 verify(); 226 227 DEBUG(dbgs() << '\n'; dumpBBs()); 228 229 BBInfo.clear(); 230 ImmBranches.clear(); 231 232 return MadeChange; 233} 234 235/// Return true if the specified basic block can fallthrough into the block 236/// immediately after it. 237static bool BBHasFallthrough(MachineBasicBlock *MBB) { 238 // Get the next machine basic block in the function. 239 MachineFunction::iterator MBBI = MBB; 240 // Can't fall off end of function. 241 if (llvm::next(MBBI) == MBB->getParent()->end()) 242 return false; 243 244 MachineBasicBlock *NextBB = llvm::next(MBBI); 245 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 246 E = MBB->succ_end(); I != E; ++I) 247 if (*I == NextBB) 248 return true; 249 250 return false; 251} 252 253/// Do the initial scan of the function, building up information about the sizes 254/// of each block, and each immediate branch. 255void AArch64BranchFixup::initializeFunctionInfo() { 256 BBInfo.clear(); 257 BBInfo.resize(MF->getNumBlockIDs()); 258 259 // First thing, compute the size of all basic blocks, and see if the function 260 // has any inline assembly in it. If so, we have to be conservative about 261 // alignment assumptions, as we don't know for sure the size of any 262 // instructions in the inline assembly. 263 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) 264 computeBlockSize(I); 265 266 // The known bits of the entry block offset are determined by the function 267 // alignment. 268 BBInfo.front().KnownBits = MF->getAlignment(); 269 270 // Compute block offsets and known bits. 271 adjustBBOffsetsAfter(MF->begin()); 272 273 // Now go back through the instructions and build up our data structures. 274 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); 275 MBBI != E; ++MBBI) { 276 MachineBasicBlock &MBB = *MBBI; 277 278 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 279 I != E; ++I) { 280 if (I->isDebugValue()) 281 continue; 282 283 int Opc = I->getOpcode(); 284 if (I->isBranch()) { 285 bool IsCond = false; 286 287 // The offsets encoded in instructions here scale by the instruction 288 // size (4 bytes), effectively increasing their range by 2 bits. 289 unsigned Bits = 0; 290 switch (Opc) { 291 default: 292 continue; // Ignore other JT branches 293 case AArch64::TBZxii: 294 case AArch64::TBZwii: 295 case AArch64::TBNZxii: 296 case AArch64::TBNZwii: 297 IsCond = true; 298 Bits = 14 + 2; 299 break; 300 case AArch64::Bcc: 301 case AArch64::CBZx: 302 case AArch64::CBZw: 303 case AArch64::CBNZx: 304 case AArch64::CBNZw: 305 IsCond = true; 306 Bits = 19 + 2; 307 break; 308 case AArch64::Bimm: 309 Bits = 26 + 2; 310 break; 311 } 312 313 // Record this immediate branch. 314 ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); 315 } 316 } 317 } 318} 319 320/// Compute the size and some alignment information for MBB. This function 321/// updates BBInfo directly. 322void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { 323 BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; 324 BBI.Size = 0; 325 BBI.Unalign = 0; 326 327 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 328 ++I) { 329 BBI.Size += TII->getInstSizeInBytes(*I); 330 // For inline asm, GetInstSizeInBytes returns a conservative estimate. 331 // The actual size may be smaller, but still a multiple of the instr size. 332 if (I->isInlineAsm()) 333 BBI.Unalign = 2; 334 } 335} 336 337/// Return the current offset of the specified machine instruction from the 338/// start of the function. This offset changes as stuff is moved around inside 339/// the function. 340unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { 341 MachineBasicBlock *MBB = MI->getParent(); 342 343 // The offset is composed of two things: the sum of the sizes of all MBB's 344 // before this instruction's block, and the offset from the start of the block 345 // it is in. 346 unsigned Offset = BBInfo[MBB->getNumber()].Offset; 347 348 // Sum instructions before MI in MBB. 349 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { 350 assert(I != MBB->end() && "Didn't find MI in its own basic block?"); 351 Offset += TII->getInstSizeInBytes(*I); 352 } 353 return Offset; 354} 355 356/// Split the basic block containing MI into two blocks, which are joined by 357/// an unconditional branch. Update data structures and renumber blocks to 358/// account for this change and returns the newly created block. 359MachineBasicBlock * 360AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { 361 MachineBasicBlock *OrigBB = MI->getParent(); 362 363 // Create a new MBB for the code after the OrigBB. 364 MachineBasicBlock *NewBB = 365 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); 366 MachineFunction::iterator MBBI = OrigBB; ++MBBI; 367 MF->insert(MBBI, NewBB); 368 369 // Splice the instructions starting with MI over to NewBB. 370 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); 371 372 // Add an unconditional branch from OrigBB to NewBB. 373 // Note the new unconditional branch is not being recorded. 374 // There doesn't seem to be meaningful DebugInfo available; this doesn't 375 // correspond to anything in the source. 376 BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); 377 ++NumSplit; 378 379 // Update the CFG. All succs of OrigBB are now succs of NewBB. 380 NewBB->transferSuccessors(OrigBB); 381 382 // OrigBB branches to NewBB. 383 OrigBB->addSuccessor(NewBB); 384 385 // Update internal data structures to account for the newly inserted MBB. 386 MF->RenumberBlocks(NewBB); 387 388 // Insert an entry into BBInfo to align it properly with the (newly 389 // renumbered) block numbers. 390 BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); 391 392 // Figure out how large the OrigBB is. As the first half of the original 393 // block, it cannot contain a tablejump. The size includes 394 // the new jump we added. (It should be possible to do this without 395 // recounting everything, but it's very confusing, and this is rarely 396 // executed.) 397 computeBlockSize(OrigBB); 398 399 // Figure out how large the NewMBB is. As the second half of the original 400 // block, it may contain a tablejump. 401 computeBlockSize(NewBB); 402 403 // All BBOffsets following these blocks must be modified. 404 adjustBBOffsetsAfter(OrigBB); 405 406 return NewBB; 407} 408 409void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { 410 unsigned BBNum = BB->getNumber(); 411 for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { 412 // Get the offset and known bits at the end of the layout predecessor. 413 // Include the alignment of the current block. 414 unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); 415 unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); 416 unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); 417 418 // This is where block i begins. Stop if the offset is already correct, 419 // and we have updated 2 blocks. This is the maximum number of blocks 420 // changed before calling this function. 421 if (i > BBNum + 2 && 422 BBInfo[i].Offset == Offset && 423 BBInfo[i].KnownBits == KnownBits) 424 break; 425 426 BBInfo[i].Offset = Offset; 427 BBInfo[i].KnownBits = KnownBits; 428 } 429} 430 431/// Returns true if the distance between specific MI and specific BB can fit in 432/// MI's displacement field. 433bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, 434 MachineBasicBlock *DestBB, 435 unsigned OffsetBits) { 436 int64_t BrOffset = getOffsetOf(MI); 437 int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; 438 439 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() 440 << " from BB#" << MI->getParent()->getNumber() 441 << " bits available=" << OffsetBits 442 << " from " << getOffsetOf(MI) << " to " << DestOffset 443 << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); 444 445 return isIntN(OffsetBits, DestOffset - BrOffset); 446} 447 448/// Fix up an immediate branch whose destination is too far away to fit in its 449/// displacement field. 450bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { 451 MachineInstr *MI = Br.MI; 452 MachineBasicBlock *DestBB = 0; 453 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 454 if (MI->getOperand(i).isMBB()) { 455 DestBB = MI->getOperand(i).getMBB(); 456 break; 457 } 458 } 459 assert(DestBB && "Branch with no destination BB?"); 460 461 // Check to see if the DestBB is already in-range. 462 if (isBBInRange(MI, DestBB, Br.OffsetBits)) 463 return false; 464 465 assert(Br.IsCond && "Only conditional branches should need fixup"); 466 return fixupConditionalBr(Br); 467} 468 469/// Fix up a conditional branch whose destination is too far away to fit in its 470/// displacement field. It is converted to an inverse conditional branch + an 471/// unconditional branch to the destination. 472bool 473AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { 474 MachineInstr *MI = Br.MI; 475 MachineBasicBlock *MBB = MI->getParent(); 476 unsigned CondBrMBBOperand = 0; 477 478 // The general idea is to add an unconditional branch to the destination and 479 // invert the conditional branch to jump over it. Complications occur around 480 // fallthrough and unreachable ends to the block. 481 // b.lt L1 482 // => 483 // b.ge L2 484 // b L1 485 // L2: 486 487 // First we invert the conditional branch, by creating a replacement if 488 // necessary. This if statement contains all the special handling of different 489 // branch types. 490 if (MI->getOpcode() == AArch64::Bcc) { 491 // The basic block is operand number 1 for Bcc 492 CondBrMBBOperand = 1; 493 494 A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); 495 CC = A64InvertCondCode(CC); 496 MI->getOperand(0).setImm(CC); 497 } else { 498 MachineInstrBuilder InvertedMI; 499 int InvertedOpcode; 500 switch (MI->getOpcode()) { 501 default: llvm_unreachable("Unknown branch type"); 502 case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; 503 case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; 504 case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; 505 case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; 506 case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; 507 case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; 508 case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; 509 case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; 510 } 511 512 InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); 513 for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { 514 InvertedMI.addOperand(MI->getOperand(i)); 515 if (MI->getOperand(i).isMBB()) 516 CondBrMBBOperand = i; 517 } 518 519 MI->eraseFromParent(); 520 MI = Br.MI = InvertedMI; 521 } 522 523 // If the branch is at the end of its MBB and that has a fall-through block, 524 // direct the updated conditional branch to the fall-through 525 // block. Otherwise, split the MBB before the next instruction. 526 MachineInstr *BMI = &MBB->back(); 527 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); 528 529 ++NumCBrFixed; 530 if (BMI != MI) { 531 if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && 532 BMI->getOpcode() == AArch64::Bimm) { 533 // Last MI in the BB is an unconditional branch. We can swap destinations: 534 // b.eq L1 (temporarily b.ne L1 after first change) 535 // b L2 536 // => 537 // b.ne L2 538 // b L1 539 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); 540 if (isBBInRange(MI, NewDest, Br.OffsetBits)) { 541 DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " 542 << *BMI); 543 MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); 544 BMI->getOperand(0).setMBB(DestBB); 545 MI->getOperand(CondBrMBBOperand).setMBB(NewDest); 546 return true; 547 } 548 } 549 } 550 551 if (NeedSplit) { 552 MachineBasicBlock::iterator MBBI = MI; ++MBBI; 553 splitBlockBeforeInstr(MBBI); 554 // No need for the branch to the next block. We're adding an unconditional 555 // branch to the destination. 556 int delta = TII->getInstSizeInBytes(MBB->back()); 557 BBInfo[MBB->getNumber()].Size -= delta; 558 MBB->back().eraseFromParent(); 559 // BBInfo[SplitBB].Offset is wrong temporarily, fixed below 560 } 561 562 // After splitting and removing the unconditional branch from the original BB, 563 // the structure is now: 564 // oldbb: 565 // [things] 566 // b.invertedCC L1 567 // splitbb/fallthroughbb: 568 // [old b L2/real continuation] 569 // 570 // We now have to change the conditional branch to point to splitbb and add an 571 // unconditional branch after it to L1, giving the final structure: 572 // oldbb: 573 // [things] 574 // b.invertedCC splitbb 575 // b L1 576 // splitbb/fallthroughbb: 577 // [old b L2/real continuation] 578 MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); 579 580 DEBUG(dbgs() << " Insert B to BB#" 581 << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() 582 << " also invert condition and change dest. to BB#" 583 << NextBB->getNumber() << "\n"); 584 585 // Insert a new unconditional branch and fixup the destination of the 586 // conditional one. Also update the ImmBranch as well as adding a new entry 587 // for the new branch. 588 BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) 589 .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); 590 MI->getOperand(CondBrMBBOperand).setMBB(NextBB); 591 592 BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); 593 594 // 26 bits written down in Bimm, specifying a multiple of 4. 595 unsigned OffsetBits = 26 + 2; 596 ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); 597 598 adjustBBOffsetsAfter(MBB); 599 return true; 600} 601