1234353Sdim//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9193323Sed// This file contains a pass that scans a machine function to determine which 10193323Sed// conditional branches need more than 16 bits of displacement to reach their 11193323Sed// target basic block. It does this in two passes; a calculation of basic block 12212904Sdim// positions pass, and a branch pseudo op to machine branch opcode pass. This 13193323Sed// pass should be run last, just before the assembly printer. 14193323Sed// 15193323Sed//===----------------------------------------------------------------------===// 16193323Sed 17321369Sdim#include "MCTargetDesc/PPCPredicates.h" 18193323Sed#include "PPC.h" 19193323Sed#include "PPCInstrBuilder.h" 20193323Sed#include "PPCInstrInfo.h" 21314564Sdim#include "PPCSubtarget.h" 22249423Sdim#include "llvm/ADT/Statistic.h" 23193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 24314564Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 26249423Sdim#include "llvm/Support/MathExtras.h" 27193323Sed#include "llvm/Target/TargetMachine.h" 28353358Sdim#include <algorithm> 29193323Sedusing namespace llvm; 30193323Sed 31276479Sdim#define DEBUG_TYPE "ppc-branch-select" 32276479Sdim 33193323SedSTATISTIC(NumExpanded, "Number of branches expanded to long format"); 34193323Sed 35193323Sednamespace { 36198892Srdivacky struct PPCBSel : public MachineFunctionPass { 37193323Sed static char ID; 38249423Sdim PPCBSel() : MachineFunctionPass(ID) { 39249423Sdim initializePPCBSelPass(*PassRegistry::getPassRegistry()); 40249423Sdim } 41193323Sed 42314564Sdim // The sizes of the basic blocks in the function (the first 43314564Sdim // element of the pair); the second element of the pair is the amount of the 44314564Sdim // size that is due to potential padding. 45314564Sdim std::vector<std::pair<unsigned, unsigned>> BlockSizes; 46193323Sed 47353358Sdim // The first block number which has imprecise instruction address. 48353358Sdim int FirstImpreciseBlock = -1; 49353358Sdim 50353358Sdim unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset); 51353358Sdim unsigned ComputeBlockSizes(MachineFunction &Fn); 52353358Sdim void modifyAdjustment(MachineFunction &Fn); 53353358Sdim int computeBranchSize(MachineFunction &Fn, 54353358Sdim const MachineBasicBlock *Src, 55353358Sdim const MachineBasicBlock *Dest, 56353358Sdim unsigned BrOffset); 57353358Sdim 58276479Sdim bool runOnMachineFunction(MachineFunction &Fn) override; 59193323Sed 60309124Sdim MachineFunctionProperties getRequiredProperties() const override { 61309124Sdim return MachineFunctionProperties().set( 62314564Sdim MachineFunctionProperties::Property::NoVRegs); 63309124Sdim } 64309124Sdim 65314564Sdim StringRef getPassName() const override { return "PowerPC Branch Selector"; } 66193323Sed }; 67193323Sed char PPCBSel::ID = 0; 68193323Sed} 69193323Sed 70249423SdimINITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", 71249423Sdim false, false) 72249423Sdim 73193323Sed/// createPPCBranchSelectionPass - returns an instance of the Branch Selection 74193323Sed/// Pass 75193323Sed/// 76193323SedFunctionPass *llvm::createPPCBranchSelectionPass() { 77193323Sed return new PPCBSel(); 78193323Sed} 79193323Sed 80353358Sdim/// In order to make MBB aligned, we need to add an adjustment value to the 81353358Sdim/// original Offset. 82353358Sdimunsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB, 83353358Sdim unsigned Offset) { 84360784Sdim const Align Alignment = MBB.getAlignment(); 85360784Sdim if (Alignment == Align::None()) 86353358Sdim return 0; 87193323Sed 88360784Sdim const Align ParentAlign = MBB.getParent()->getAlignment(); 89280031Sdim 90360784Sdim if (Alignment <= ParentAlign) 91360784Sdim return offsetToAlignment(Offset, Alignment); 92280031Sdim 93353358Sdim // The alignment of this MBB is larger than the function's alignment, so we 94353358Sdim // can't tell whether or not it will insert nops. Assume that it will. 95353358Sdim if (FirstImpreciseBlock < 0) 96353358Sdim FirstImpreciseBlock = MBB.getNumber(); 97360784Sdim return Alignment.value() + offsetToAlignment(Offset, Alignment); 98353358Sdim} 99280031Sdim 100353358Sdim/// We need to be careful about the offset of the first block in the function 101353358Sdim/// because it might not have the function's alignment. This happens because, 102353358Sdim/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a 103353358Sdim/// two-instruction sequence to the start of the function. 104353358Sdim/// Note: This needs to be synchronized with the check in 105353358Sdim/// PPCLinuxAsmPrinter::EmitFunctionBodyStart. 106353358Sdimstatic inline unsigned GetInitialOffset(MachineFunction &Fn) { 107314564Sdim unsigned InitialOffset = 0; 108314564Sdim if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() && 109314564Sdim !Fn.getRegInfo().use_empty(PPC::X2)) 110314564Sdim InitialOffset = 8; 111353358Sdim return InitialOffset; 112353358Sdim} 113314564Sdim 114353358Sdim/// Measure each MBB and compute a size for the entire function. 115353358Sdimunsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { 116353358Sdim const PPCInstrInfo *TII = 117353358Sdim static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); 118353358Sdim unsigned FuncSize = GetInitialOffset(Fn); 119353358Sdim 120193323Sed for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 121193323Sed ++MFI) { 122296417Sdim MachineBasicBlock *MBB = &*MFI; 123193323Sed 124280031Sdim // The end of the previous block may have extra nops if this block has an 125280031Sdim // alignment requirement. 126280031Sdim if (MBB->getNumber() > 0) { 127280031Sdim unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize); 128314564Sdim 129314564Sdim auto &BS = BlockSizes[MBB->getNumber()-1]; 130314564Sdim BS.first += AlignExtra; 131314564Sdim BS.second = AlignExtra; 132314564Sdim 133280031Sdim FuncSize += AlignExtra; 134280031Sdim } 135280031Sdim 136193323Sed unsigned BlockSize = 0; 137353358Sdim for (MachineInstr &MI : *MBB) { 138314564Sdim BlockSize += TII->getInstSizeInBytes(MI); 139353358Sdim if (MI.isInlineAsm() && (FirstImpreciseBlock < 0)) 140353358Sdim FirstImpreciseBlock = MBB->getNumber(); 141353358Sdim } 142309124Sdim 143314564Sdim BlockSizes[MBB->getNumber()].first = BlockSize; 144193323Sed FuncSize += BlockSize; 145193323Sed } 146341825Sdim 147353358Sdim return FuncSize; 148353358Sdim} 149353358Sdim 150353358Sdim/// Modify the basic block align adjustment. 151353358Sdimvoid PPCBSel::modifyAdjustment(MachineFunction &Fn) { 152353358Sdim unsigned Offset = GetInitialOffset(Fn); 153353358Sdim for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 154353358Sdim ++MFI) { 155353358Sdim MachineBasicBlock *MBB = &*MFI; 156353358Sdim 157353358Sdim if (MBB->getNumber() > 0) { 158353358Sdim auto &BS = BlockSizes[MBB->getNumber()-1]; 159353358Sdim BS.first -= BS.second; 160353358Sdim Offset -= BS.second; 161353358Sdim 162353358Sdim unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset); 163353358Sdim 164353358Sdim BS.first += AlignExtra; 165353358Sdim BS.second = AlignExtra; 166353358Sdim 167353358Sdim Offset += AlignExtra; 168353358Sdim } 169353358Sdim 170353358Sdim Offset += BlockSizes[MBB->getNumber()].first; 171353358Sdim } 172353358Sdim} 173353358Sdim 174353358Sdim/// Determine the offset from the branch in Src block to the Dest block. 175353358Sdim/// BrOffset is the offset of the branch instruction inside Src block. 176353358Sdimint PPCBSel::computeBranchSize(MachineFunction &Fn, 177353358Sdim const MachineBasicBlock *Src, 178353358Sdim const MachineBasicBlock *Dest, 179353358Sdim unsigned BrOffset) { 180353358Sdim int BranchSize; 181360784Sdim Align MaxAlign = Align(4); 182353358Sdim bool NeedExtraAdjustment = false; 183353358Sdim if (Dest->getNumber() <= Src->getNumber()) { 184353358Sdim // If this is a backwards branch, the delta is the offset from the 185353358Sdim // start of this block to this branch, plus the sizes of all blocks 186353358Sdim // from this block to the dest. 187353358Sdim BranchSize = BrOffset; 188353358Sdim MaxAlign = std::max(MaxAlign, Src->getAlignment()); 189353358Sdim 190353358Sdim int DestBlock = Dest->getNumber(); 191353358Sdim BranchSize += BlockSizes[DestBlock].first; 192353358Sdim for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) { 193353358Sdim BranchSize += BlockSizes[i].first; 194360784Sdim MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); 195353358Sdim } 196353358Sdim 197353358Sdim NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && 198353358Sdim (DestBlock >= FirstImpreciseBlock); 199353358Sdim } else { 200353358Sdim // Otherwise, add the size of the blocks between this block and the 201353358Sdim // dest to the number of bytes left in this block. 202353358Sdim unsigned StartBlock = Src->getNumber(); 203353358Sdim BranchSize = BlockSizes[StartBlock].first - BrOffset; 204353358Sdim 205353358Sdim MaxAlign = std::max(MaxAlign, Dest->getAlignment()); 206353358Sdim for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) { 207353358Sdim BranchSize += BlockSizes[i].first; 208360784Sdim MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); 209353358Sdim } 210353358Sdim 211353358Sdim NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && 212353358Sdim (Src->getNumber() >= FirstImpreciseBlock); 213353358Sdim } 214353358Sdim 215353358Sdim // We tend to over estimate code size due to large alignment and 216353358Sdim // inline assembly. Usually it causes larger computed branch offset. 217353358Sdim // But sometimes it may also causes smaller computed branch offset 218353358Sdim // than actual branch offset. If the offset is close to the limit of 219353358Sdim // encoding, it may cause problem at run time. 220353358Sdim // Following is a simplified example. 221353358Sdim // 222353358Sdim // actual estimated 223353358Sdim // address address 224353358Sdim // ... 225353358Sdim // bne Far 100 10c 226353358Sdim // .p2align 4 227353358Sdim // Near: 110 110 228353358Sdim // ... 229353358Sdim // Far: 8108 8108 230353358Sdim // 231353358Sdim // Actual offset: 0x8108 - 0x100 = 0x8008 232353358Sdim // Computed offset: 0x8108 - 0x10c = 0x7ffc 233353358Sdim // 234353358Sdim // This example also shows when we can get the largest gap between 235353358Sdim // estimated offset and actual offset. If there is an aligned block 236353358Sdim // ABB between branch and target, assume its alignment is <align> 237353358Sdim // bits. Now consider the accumulated function size FSIZE till the end 238353358Sdim // of previous block PBB. If the estimated FSIZE is multiple of 239353358Sdim // 2^<align>, we don't need any padding for the estimated address of 240353358Sdim // ABB. If actual FSIZE at the end of PBB is 4 bytes more than 241353358Sdim // multiple of 2^<align>, then we need (2^<align> - 4) bytes of 242353358Sdim // padding. It also means the actual branch offset is (2^<align> - 4) 243353358Sdim // larger than computed offset. Other actual FSIZE needs less padding 244353358Sdim // bytes, so causes smaller gap between actual and computed offset. 245353358Sdim // 246353358Sdim // On the other hand, if the inline asm or large alignment occurs 247353358Sdim // between the branch block and destination block, the estimated address 248353358Sdim // can be <delta> larger than actual address. If padding bytes are 249353358Sdim // needed for a later aligned block, the actual number of padding bytes 250353358Sdim // is at most <delta> more than estimated padding bytes. So the actual 251353358Sdim // aligned block address is less than or equal to the estimated aligned 252353358Sdim // block address. So the actual branch offset is less than or equal to 253353358Sdim // computed branch offset. 254353358Sdim // 255353358Sdim // The computed offset is at most ((1 << alignment) - 4) bytes smaller 256353358Sdim // than actual offset. So we add this number to the offset for safety. 257353358Sdim if (NeedExtraAdjustment) 258360784Sdim BranchSize += MaxAlign.value() - 4; 259353358Sdim 260353358Sdim return BranchSize; 261353358Sdim} 262353358Sdim 263353358Sdimbool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { 264353358Sdim const PPCInstrInfo *TII = 265353358Sdim static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); 266353358Sdim // Give the blocks of the function a dense, in-order, numbering. 267353358Sdim Fn.RenumberBlocks(); 268353358Sdim BlockSizes.resize(Fn.getNumBlockIDs()); 269353358Sdim FirstImpreciseBlock = -1; 270353358Sdim 271353358Sdim // Measure each MBB and compute a size for the entire function. 272353358Sdim unsigned FuncSize = ComputeBlockSizes(Fn); 273353358Sdim 274193323Sed // If the entire function is smaller than the displacement of a branch field, 275193323Sed // we know we don't need to shrink any branches in this function. This is a 276193323Sed // common case. 277193323Sed if (FuncSize < (1 << 15)) { 278193323Sed BlockSizes.clear(); 279193323Sed return false; 280193323Sed } 281341825Sdim 282193323Sed // For each conditional branch, if the offset to its destination is larger 283193323Sed // than the offset field allows, transform it into a long branch sequence 284193323Sed // like this: 285193323Sed // short branch: 286193323Sed // bCC MBB 287193323Sed // long branch: 288193323Sed // b!CC $PC+8 289193323Sed // b MBB 290193323Sed // 291193323Sed bool MadeChange = true; 292193323Sed bool EverMadeChange = false; 293193323Sed while (MadeChange) { 294193323Sed // Iteratively expand branches until we reach a fixed point. 295193323Sed MadeChange = false; 296341825Sdim 297193323Sed for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 298193323Sed ++MFI) { 299193323Sed MachineBasicBlock &MBB = *MFI; 300193323Sed unsigned MBBStartOffset = 0; 301193323Sed for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 302193323Sed I != E; ++I) { 303276479Sdim MachineBasicBlock *Dest = nullptr; 304251662Sdim if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) 305251662Sdim Dest = I->getOperand(2).getMBB(); 306276479Sdim else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && 307276479Sdim !I->getOperand(1).isImm()) 308276479Sdim Dest = I->getOperand(1).getMBB(); 309251662Sdim else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || 310251662Sdim I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && 311251662Sdim !I->getOperand(0).isImm()) 312251662Sdim Dest = I->getOperand(0).getMBB(); 313251662Sdim 314251662Sdim if (!Dest) { 315314564Sdim MBBStartOffset += TII->getInstSizeInBytes(*I); 316193323Sed continue; 317193323Sed } 318341825Sdim 319193323Sed // Determine the offset from the current branch to the destination 320193323Sed // block. 321353358Sdim int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset); 322341825Sdim 323193323Sed // If this branch is in range, ignore it. 324206083Srdivacky if (isInt<16>(BranchSize)) { 325193323Sed MBBStartOffset += 4; 326193323Sed continue; 327193323Sed } 328239462Sdim 329193323Sed // Otherwise, we have to expand it to a long branch. 330314564Sdim MachineInstr &OldBranch = *I; 331314564Sdim DebugLoc dl = OldBranch.getDebugLoc(); 332314564Sdim 333239462Sdim if (I->getOpcode() == PPC::BCC) { 334239462Sdim // The BCC operands are: 335239462Sdim // 0. PPC branch predicate 336239462Sdim // 1. CR register 337239462Sdim // 2. Target MBB 338239462Sdim PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); 339360784Sdim Register CRReg = I->getOperand(1).getReg(); 340341825Sdim 341239462Sdim // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. 342239462Sdim BuildMI(MBB, I, dl, TII->get(PPC::BCC)) 343239462Sdim .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); 344276479Sdim } else if (I->getOpcode() == PPC::BC) { 345360784Sdim Register CRBit = I->getOperand(0).getReg(); 346276479Sdim BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); 347276479Sdim } else if (I->getOpcode() == PPC::BCn) { 348360784Sdim Register CRBit = I->getOperand(0).getReg(); 349276479Sdim BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); 350239462Sdim } else if (I->getOpcode() == PPC::BDNZ) { 351239462Sdim BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); 352239462Sdim } else if (I->getOpcode() == PPC::BDNZ8) { 353239462Sdim BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2); 354239462Sdim } else if (I->getOpcode() == PPC::BDZ) { 355239462Sdim BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2); 356239462Sdim } else if (I->getOpcode() == PPC::BDZ8) { 357239462Sdim BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2); 358239462Sdim } else { 359239462Sdim llvm_unreachable("Unhandled branch type!"); 360239462Sdim } 361341825Sdim 362193323Sed // Uncond branch to the real destination. 363193323Sed I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); 364193323Sed 365193323Sed // Remove the old branch from the function. 366314564Sdim OldBranch.eraseFromParent(); 367314564Sdim 368193323Sed // Remember that this instruction is 8-bytes, increase the size of the 369193323Sed // block by 4, remember to iterate. 370314564Sdim BlockSizes[MBB.getNumber()].first += 4; 371193323Sed MBBStartOffset += 8; 372193323Sed ++NumExpanded; 373193323Sed MadeChange = true; 374193323Sed } 375193323Sed } 376314564Sdim 377314564Sdim if (MadeChange) { 378314564Sdim // If we're going to iterate again, make sure we've updated our 379314564Sdim // padding-based contributions to the block sizes. 380353358Sdim modifyAdjustment(Fn); 381314564Sdim } 382314564Sdim 383193323Sed EverMadeChange |= MadeChange; 384193323Sed } 385341825Sdim 386193323Sed BlockSizes.clear(); 387193323Sed return true; 388193323Sed} 389