1218893Sdim//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains a pass that splits the constant pool up into 'islands' 11193323Sed// which are scattered through-out the function. This is required due to the 12193323Sed// limited pc-relative displacements that ARM has. 13193323Sed// 14193323Sed//===----------------------------------------------------------------------===// 15193323Sed 16193323Sed#define DEBUG_TYPE "arm-cp-islands" 17193323Sed#include "ARM.h" 18193323Sed#include "ARMMachineFunctionInfo.h" 19252723Sdim#include "MCTargetDesc/ARMAddressingModes.h" 20212904Sdim#include "Thumb2InstrInfo.h" 21252723Sdim#include "llvm/ADT/STLExtras.h" 22252723Sdim#include "llvm/ADT/SmallSet.h" 23252723Sdim#include "llvm/ADT/SmallVector.h" 24252723Sdim#include "llvm/ADT/Statistic.h" 25193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 26193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 27198090Srdivacky#include "llvm/CodeGen/MachineJumpTableInfo.h" 28235633Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 29252723Sdim#include "llvm/IR/DataLayout.h" 30252723Sdim#include "llvm/Support/CommandLine.h" 31193323Sed#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33235633Sdim#include "llvm/Support/Format.h" 34198090Srdivacky#include "llvm/Support/raw_ostream.h" 35252723Sdim#include "llvm/Target/TargetMachine.h" 36198396Srdivacky#include <algorithm> 37193323Sedusing namespace llvm; 38193323Sed 39198090SrdivackySTATISTIC(NumCPEs, "Number of constpool entries"); 40198090SrdivackySTATISTIC(NumSplit, "Number of uncond branches inserted"); 41198090SrdivackySTATISTIC(NumCBrFixed, "Number of cond branches fixed"); 42198090SrdivackySTATISTIC(NumUBrFixed, "Number of uncond branches fixed"); 43198090SrdivackySTATISTIC(NumTBs, "Number of table branches generated"); 44198090SrdivackySTATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); 45198090SrdivackySTATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); 46198892SrdivackySTATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); 47199481SrdivackySTATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); 48199481SrdivackySTATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); 49193323Sed 50199481Srdivacky 51199481Srdivackystatic cl::opt<bool> 52199481SrdivackyAdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), 53199481Srdivacky cl::desc("Adjust basic block layout to better use TB[BH]")); 54199481Srdivacky 55235633Sdim// FIXME: This option should be removed once it has received sufficient testing. 56235633Sdimstatic cl::opt<bool> 57235633SdimAlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), 58235633Sdim cl::desc("Align constant islands in code")); 59235633Sdim 60235633Sdim/// UnknownPadding - Return the worst case padding that could result from 61235633Sdim/// unknown offset bits. This does not include alignment padding caused by 62235633Sdim/// known offset bits. 63235633Sdim/// 64235633Sdim/// @param LogAlign log2(alignment) 65235633Sdim/// @param KnownBits Number of known low offset bits. 66235633Sdimstatic inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { 67235633Sdim if (KnownBits < LogAlign) 68235633Sdim return (1u << LogAlign) - (1u << KnownBits); 69235633Sdim return 0; 70235633Sdim} 71235633Sdim 72193323Sednamespace { 73193323Sed /// ARMConstantIslands - Due to limited PC-relative displacements, ARM 74193323Sed /// requires constant pool entries to be scattered among the instructions 75193323Sed /// inside a function. To do this, it completely ignores the normal LLVM 76193323Sed /// constant pool; instead, it places constants wherever it feels like with 77193323Sed /// special instructions. 78193323Sed /// 79193323Sed /// The terminology used in this pass includes: 80193323Sed /// Islands - Clumps of constants placed in the function. 81193323Sed /// Water - Potential places where an island could be formed. 82193323Sed /// CPE - A constant pool entry that has been placed somewhere, which 83193323Sed /// tracks a list of users. 84198892Srdivacky class ARMConstantIslands : public MachineFunctionPass { 85235633Sdim /// BasicBlockInfo - Information about the offset and size of a single 86235633Sdim /// basic block. 87235633Sdim struct BasicBlockInfo { 88235633Sdim /// Offset - Distance from the beginning of the function to the beginning 89235633Sdim /// of this basic block. 90235633Sdim /// 91245431Sdim /// Offsets are computed assuming worst case padding before an aligned 92245431Sdim /// block. This means that subtracting basic block offsets always gives a 93245431Sdim /// conservative estimate of the real distance which may be smaller. 94245431Sdim /// 95245431Sdim /// Because worst case padding is used, the computed offset of an aligned 96245431Sdim /// block may not actually be aligned. 97235633Sdim unsigned Offset; 98193323Sed 99235633Sdim /// Size - Size of the basic block in bytes. If the block contains 100235633Sdim /// inline assembly, this is a worst case estimate. 101235633Sdim /// 102235633Sdim /// The size does not include any alignment padding whether from the 103235633Sdim /// beginning of the block, or from an aligned jump table at the end. 104235633Sdim unsigned Size; 105193323Sed 106235633Sdim /// KnownBits - The number of low bits in Offset that are known to be 107235633Sdim /// exact. The remaining bits of Offset are an upper bound. 108235633Sdim uint8_t KnownBits; 109235633Sdim 110235633Sdim /// Unalign - When non-zero, the block contains instructions (inline asm) 111235633Sdim /// of unknown size. The real size may be smaller than Size bytes by a 112235633Sdim /// multiple of 1 << Unalign. 113235633Sdim uint8_t Unalign; 114235633Sdim 115235633Sdim /// PostAlign - When non-zero, the block terminator contains a .align 116235633Sdim /// directive, so the end of the block is aligned to 1 << PostAlign 117235633Sdim /// bytes. 118235633Sdim uint8_t PostAlign; 119235633Sdim 120235633Sdim BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), 121235633Sdim PostAlign(0) {} 122235633Sdim 123235633Sdim /// Compute the number of known offset bits internally to this block. 124235633Sdim /// This number should be used to predict worst case padding when 125235633Sdim /// splitting the block. 126235633Sdim unsigned internalKnownBits() const { 127245431Sdim unsigned Bits = Unalign ? Unalign : KnownBits; 128245431Sdim // If the block size isn't a multiple of the known bits, assume the 129245431Sdim // worst case padding. 130245431Sdim if (Size & ((1u << Bits) - 1)) 131263509Sdim Bits = countTrailingZeros(Size); 132245431Sdim return Bits; 133235633Sdim } 134235633Sdim 135235633Sdim /// Compute the offset immediately following this block. If LogAlign is 136235633Sdim /// specified, return the offset the successor block will get if it has 137235633Sdim /// this alignment. 138235633Sdim unsigned postOffset(unsigned LogAlign = 0) const { 139235633Sdim unsigned PO = Offset + Size; 140235633Sdim unsigned LA = std::max(unsigned(PostAlign), LogAlign); 141235633Sdim if (!LA) 142235633Sdim return PO; 143235633Sdim // Add alignment padding from the terminator. 144245431Sdim return PO + UnknownPadding(LA, internalKnownBits()); 145235633Sdim } 146235633Sdim 147235633Sdim /// Compute the number of known low bits of postOffset. If this block 148235633Sdim /// contains inline asm, the number of known bits drops to the 149235633Sdim /// instruction alignment. An aligned terminator may increase the number 150235633Sdim /// of know bits. 151235633Sdim /// If LogAlign is given, also consider the alignment of the next block. 152235633Sdim unsigned postKnownBits(unsigned LogAlign = 0) const { 153235633Sdim return std::max(std::max(unsigned(PostAlign), LogAlign), 154235633Sdim internalKnownBits()); 155235633Sdim } 156235633Sdim }; 157235633Sdim 158235633Sdim std::vector<BasicBlockInfo> BBInfo; 159235633Sdim 160193323Sed /// WaterList - A sorted list of basic blocks where islands could be placed 161193323Sed /// (i.e. blocks that don't fall through to the following block, due 162193323Sed /// to a return, unreachable, or unconditional branch). 163193323Sed std::vector<MachineBasicBlock*> WaterList; 164193323Sed 165198396Srdivacky /// NewWaterList - The subset of WaterList that was created since the 166198396Srdivacky /// previous iteration by inserting unconditional branches. 167198396Srdivacky SmallSet<MachineBasicBlock*, 4> NewWaterList; 168198396Srdivacky 169198090Srdivacky typedef std::vector<MachineBasicBlock*>::iterator water_iterator; 170198090Srdivacky 171193323Sed /// CPUser - One user of a constant pool, keeping the machine instruction 172193323Sed /// pointer, the constant pool being referenced, and the max displacement 173198113Srdivacky /// allowed from the instruction to the CP. The HighWaterMark records the 174198113Srdivacky /// highest basic block where a new CPEntry can be placed. To ensure this 175198113Srdivacky /// pass terminates, the CP entries are initially placed at the end of the 176198113Srdivacky /// function and then move monotonically to lower addresses. The 177198113Srdivacky /// exception to this rule is when the current CP entry for a particular 178198113Srdivacky /// CPUser is out of range, but there is another CP entry for the same 179198113Srdivacky /// constant value in range. We want to use the existing in-range CP 180198113Srdivacky /// entry, but if it later moves out of range, the search for new water 181198113Srdivacky /// should resume where it left off. The HighWaterMark is used to record 182198113Srdivacky /// that point. 183193323Sed struct CPUser { 184193323Sed MachineInstr *MI; 185193323Sed MachineInstr *CPEMI; 186198113Srdivacky MachineBasicBlock *HighWaterMark; 187235633Sdim private: 188193323Sed unsigned MaxDisp; 189235633Sdim public: 190198090Srdivacky bool NegOk; 191198090Srdivacky bool IsSoImm; 192235633Sdim bool KnownAlignment; 193198090Srdivacky CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, 194198090Srdivacky bool neg, bool soimm) 195235633Sdim : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), 196235633Sdim KnownAlignment(false) { 197198113Srdivacky HighWaterMark = CPEMI->getParent(); 198198113Srdivacky } 199235633Sdim /// getMaxDisp - Returns the maximum displacement supported by MI. 200235633Sdim /// Correct for unknown alignment. 201235633Sdim /// Conservatively subtract 2 bytes to handle weird alignment effects. 202235633Sdim unsigned getMaxDisp() const { 203235633Sdim return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2; 204235633Sdim } 205193323Sed }; 206193323Sed 207193323Sed /// CPUsers - Keep track of all of the machine instructions that use various 208193323Sed /// constant pools and their max displacement. 209193323Sed std::vector<CPUser> CPUsers; 210193323Sed 211193323Sed /// CPEntry - One per constant pool entry, keeping the machine instruction 212193323Sed /// pointer, the constpool index, and the number of CPUser's which 213193323Sed /// reference this entry. 214193323Sed struct CPEntry { 215193323Sed MachineInstr *CPEMI; 216193323Sed unsigned CPI; 217193323Sed unsigned RefCount; 218193323Sed CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) 219193323Sed : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} 220193323Sed }; 221193323Sed 222193323Sed /// CPEntries - Keep track of all of the constant pool entry machine 223193323Sed /// instructions. For each original constpool index (i.e. those that 224193323Sed /// existed upon entry to this pass), it keeps a vector of entries. 225193323Sed /// Original elements are cloned as we go along; the clones are 226193323Sed /// put in the vector of the original element, but have distinct CPIs. 227193323Sed std::vector<std::vector<CPEntry> > CPEntries; 228193323Sed 229193323Sed /// ImmBranch - One per immediate branch, keeping the machine instruction 230193323Sed /// pointer, conditional or unconditional, the max displacement, 231193323Sed /// and (if isCond is true) the corresponding unconditional branch 232193323Sed /// opcode. 233193323Sed struct ImmBranch { 234193323Sed MachineInstr *MI; 235193323Sed unsigned MaxDisp : 31; 236193323Sed bool isCond : 1; 237193323Sed int UncondBr; 238193323Sed ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) 239193323Sed : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} 240193323Sed }; 241193323Sed 242193323Sed /// ImmBranches - Keep track of all the immediate branch instructions. 243193323Sed /// 244193323Sed std::vector<ImmBranch> ImmBranches; 245193323Sed 246193323Sed /// PushPopMIs - Keep track of all the Thumb push / pop instructions. 247193323Sed /// 248193323Sed SmallVector<MachineInstr*, 4> PushPopMIs; 249193323Sed 250198090Srdivacky /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. 251198090Srdivacky SmallVector<MachineInstr*, 4> T2JumpTables; 252198090Srdivacky 253193323Sed /// HasFarJump - True if any far jump instruction has been emitted during 254193323Sed /// the branch fix up pass. 255193323Sed bool HasFarJump; 256193323Sed 257235633Sdim MachineFunction *MF; 258235633Sdim MachineConstantPool *MCP; 259235633Sdim const ARMBaseInstrInfo *TII; 260198090Srdivacky const ARMSubtarget *STI; 261193323Sed ARMFunctionInfo *AFI; 262193323Sed bool isThumb; 263198090Srdivacky bool isThumb1; 264195340Sed bool isThumb2; 265193323Sed public: 266193323Sed static char ID; 267212904Sdim ARMConstantIslands() : MachineFunctionPass(ID) {} 268193323Sed 269198090Srdivacky virtual bool runOnMachineFunction(MachineFunction &MF); 270193323Sed 271193323Sed virtual const char *getPassName() const { 272193323Sed return "ARM constant island placement and branch shortening pass"; 273193323Sed } 274193323Sed 275193323Sed private: 276235633Sdim void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); 277193323Sed CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); 278235633Sdim unsigned getCPELogAlign(const MachineInstr *CPEMI); 279235633Sdim void scanFunctionJumpTables(); 280235633Sdim void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); 281235633Sdim MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); 282235633Sdim void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); 283235633Sdim void adjustBBOffsetsAfter(MachineBasicBlock *BB); 284235633Sdim bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); 285235633Sdim int findInRangeCPEntry(CPUser& U, unsigned UserOffset); 286235633Sdim bool findAvailableWater(CPUser&U, unsigned UserOffset, 287235633Sdim water_iterator &WaterIter); 288235633Sdim void createNewWater(unsigned CPUserIndex, unsigned UserOffset, 289198090Srdivacky MachineBasicBlock *&NewMBB); 290235633Sdim bool handleConstantPoolUser(unsigned CPUserIndex); 291235633Sdim void removeDeadCPEMI(MachineInstr *CPEMI); 292235633Sdim bool removeUnusedCPEntries(); 293235633Sdim bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, 294235633Sdim MachineInstr *CPEMI, unsigned Disp, bool NegOk, 295235633Sdim bool DoDump = false); 296235633Sdim bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, 297235633Sdim CPUser &U, unsigned &Growth); 298235633Sdim bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); 299235633Sdim bool fixupImmediateBr(ImmBranch &Br); 300235633Sdim bool fixupConditionalBr(ImmBranch &Br); 301235633Sdim bool fixupUnconditionalBr(ImmBranch &Br); 302235633Sdim bool undoLRSpillRestore(); 303235633Sdim bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; 304235633Sdim bool optimizeThumb2Instructions(); 305235633Sdim bool optimizeThumb2Branches(); 306235633Sdim bool reorderThumb2JumpTables(); 307235633Sdim bool optimizeThumb2JumpTables(); 308235633Sdim MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, 309199481Srdivacky MachineBasicBlock *JTBB); 310193323Sed 311235633Sdim void computeBlockSize(MachineBasicBlock *MBB); 312235633Sdim unsigned getOffsetOf(MachineInstr *MI) const; 313235633Sdim unsigned getUserOffset(CPUser&) const; 314193323Sed void dumpBBs(); 315235633Sdim void verify(); 316235633Sdim 317235633Sdim bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, 318235633Sdim unsigned Disp, bool NegativeOK, bool IsSoImm = false); 319235633Sdim bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, 320235633Sdim const CPUser &U) { 321235633Sdim return isOffsetInRange(UserOffset, TrialOffset, 322235633Sdim U.getMaxDisp(), U.NegOk, U.IsSoImm); 323235633Sdim } 324193323Sed }; 325193323Sed char ARMConstantIslands::ID = 0; 326193323Sed} 327193323Sed 328193323Sed/// verify - check BBOffsets, BBSizes, alignment of islands 329235633Sdimvoid ARMConstantIslands::verify() { 330198090Srdivacky#ifndef NDEBUG 331235633Sdim for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); 332198090Srdivacky MBBI != E; ++MBBI) { 333198090Srdivacky MachineBasicBlock *MBB = MBBI; 334235633Sdim unsigned MBBId = MBB->getNumber(); 335235633Sdim assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); 336193323Sed } 337235633Sdim DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); 338199989Srdivacky for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { 339199989Srdivacky CPUser &U = CPUsers[i]; 340235633Sdim unsigned UserOffset = getUserOffset(U); 341235633Sdim // Verify offset using the real max displacement without the safety 342235633Sdim // adjustment. 343235633Sdim if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, 344235633Sdim /* DoDump = */ true)) { 345235633Sdim DEBUG(dbgs() << "OK\n"); 346235633Sdim continue; 347235633Sdim } 348235633Sdim DEBUG(dbgs() << "Out of range.\n"); 349235633Sdim dumpBBs(); 350235633Sdim DEBUG(MF->dump()); 351235633Sdim llvm_unreachable("Constant pool entry out of range!"); 352199989Srdivacky } 353198090Srdivacky#endif 354193323Sed} 355193323Sed 356193323Sed/// print block size and offset information - debugging 357193323Sedvoid ARMConstantIslands::dumpBBs() { 358235633Sdim DEBUG({ 359235633Sdim for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { 360235633Sdim const BasicBlockInfo &BBI = BBInfo[J]; 361235633Sdim dbgs() << format("%08x BB#%u\t", BBI.Offset, J) 362235633Sdim << " kb=" << unsigned(BBI.KnownBits) 363235633Sdim << " ua=" << unsigned(BBI.Unalign) 364235633Sdim << " pa=" << unsigned(BBI.PostAlign) 365235633Sdim << format(" size=%#x\n", BBInfo[J].Size); 366235633Sdim } 367235633Sdim }); 368193323Sed} 369193323Sed 370193323Sed/// createARMConstantIslandPass - returns an instance of the constpool 371193323Sed/// island pass. 372193323SedFunctionPass *llvm::createARMConstantIslandPass() { 373193323Sed return new ARMConstantIslands(); 374193323Sed} 375193323Sed 376235633Sdimbool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { 377235633Sdim MF = &mf; 378235633Sdim MCP = mf.getConstantPool(); 379193323Sed 380235633Sdim DEBUG(dbgs() << "***** ARMConstantIslands: " 381235633Sdim << MCP->getConstants().size() << " CP entries, aligned to " 382235633Sdim << MCP->getConstantPoolAlignment() << " bytes *****\n"); 383198090Srdivacky 384235633Sdim TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); 385235633Sdim AFI = MF->getInfo<ARMFunctionInfo>(); 386235633Sdim STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); 387235633Sdim 388193323Sed isThumb = AFI->isThumbFunction(); 389198090Srdivacky isThumb1 = AFI->isThumb1OnlyFunction(); 390195340Sed isThumb2 = AFI->isThumb2Function(); 391193323Sed 392193323Sed HasFarJump = false; 393193323Sed 394235633Sdim // This pass invalidates liveness information when it splits basic blocks. 395235633Sdim MF->getRegInfo().invalidateLiveness(); 396235633Sdim 397193323Sed // Renumber all of the machine basic blocks in the function, guaranteeing that 398193323Sed // the numbers agree with the position of the block in the function. 399235633Sdim MF->RenumberBlocks(); 400193323Sed 401199481Srdivacky // Try to reorder and otherwise adjust the block layout to make good use 402199481Srdivacky // of the TB[BH] instructions. 403199481Srdivacky bool MadeChange = false; 404199481Srdivacky if (isThumb2 && AdjustJumpTableBlocks) { 405235633Sdim scanFunctionJumpTables(); 406235633Sdim MadeChange |= reorderThumb2JumpTables(); 407199481Srdivacky // Data is out of date, so clear it. It'll be re-computed later. 408199481Srdivacky T2JumpTables.clear(); 409199481Srdivacky // Blocks may have shifted around. Keep the numbering up to date. 410235633Sdim MF->RenumberBlocks(); 411199481Srdivacky } 412199481Srdivacky 413198090Srdivacky // Thumb1 functions containing constant pools get 4-byte alignment. 414198090Srdivacky // This is so we can keep exact track of where the alignment padding goes. 415193323Sed 416203954Srdivacky // ARM and Thumb2 functions need to be 4-byte aligned. 417203954Srdivacky if (!isThumb1) 418245431Sdim MF->ensureAlignment(2); // 2 = log2(4) 419198090Srdivacky 420193323Sed // Perform the initial placement of the constant pool entries. To start with, 421193323Sed // we put them all at the end of the function. 422193323Sed std::vector<MachineInstr*> CPEMIs; 423235633Sdim if (!MCP->isEmpty()) 424235633Sdim doInitialPlacement(CPEMIs); 425193323Sed 426193323Sed /// The next UID to take is the first unused one. 427218893Sdim AFI->initPICLabelUId(CPEMIs.size()); 428193323Sed 429193323Sed // Do the initial scan of the function, building up information about the 430193323Sed // sizes of each block, the location of all the water, and finding all of the 431193323Sed // constant pool users. 432235633Sdim initializeFunctionInfo(CPEMIs); 433193323Sed CPEMIs.clear(); 434212904Sdim DEBUG(dumpBBs()); 435193323Sed 436212904Sdim 437193323Sed /// Remove dead constant pool entries. 438235633Sdim MadeChange |= removeUnusedCPEntries(); 439193323Sed 440193323Sed // Iteratively place constant pool entries and fix up branches until there 441193323Sed // is no change. 442198090Srdivacky unsigned NoCPIters = 0, NoBRIters = 0; 443193323Sed while (true) { 444235633Sdim DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); 445198090Srdivacky bool CPChange = false; 446193323Sed for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) 447235633Sdim CPChange |= handleConstantPoolUser(i); 448198090Srdivacky if (CPChange && ++NoCPIters > 30) 449235633Sdim report_fatal_error("Constant Island pass failed to converge!"); 450193323Sed DEBUG(dumpBBs()); 451210299Sed 452198396Srdivacky // Clear NewWaterList now. If we split a block for branches, it should 453198396Srdivacky // appear as "new water" for the next iteration of constant pool placement. 454198396Srdivacky NewWaterList.clear(); 455198090Srdivacky 456235633Sdim DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); 457198090Srdivacky bool BRChange = false; 458193323Sed for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) 459235633Sdim BRChange |= fixupImmediateBr(ImmBranches[i]); 460198090Srdivacky if (BRChange && ++NoBRIters > 30) 461235633Sdim report_fatal_error("Branch Fix Up pass failed to converge!"); 462193323Sed DEBUG(dumpBBs()); 463198090Srdivacky 464198090Srdivacky if (!CPChange && !BRChange) 465193323Sed break; 466193323Sed MadeChange = true; 467193323Sed } 468193323Sed 469198090Srdivacky // Shrink 32-bit Thumb2 branch, load, and store instructions. 470212904Sdim if (isThumb2 && !STI->prefers32BitThumb()) 471235633Sdim MadeChange |= optimizeThumb2Instructions(); 472198090Srdivacky 473193323Sed // After a while, this might be made debug-only, but it is not expensive. 474235633Sdim verify(); 475193323Sed 476210299Sed // If LR has been forced spilled and no far jump (i.e. BL) has been issued, 477210299Sed // undo the spill / restore of LR if possible. 478198090Srdivacky if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) 479235633Sdim MadeChange |= undoLRSpillRestore(); 480193323Sed 481218893Sdim // Save the mapping between original and cloned constpool entries. 482218893Sdim for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { 483218893Sdim for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { 484218893Sdim const CPEntry & CPE = CPEntries[i][j]; 485218893Sdim AFI->recordCPEClone(i, CPE.CPI); 486218893Sdim } 487218893Sdim } 488218893Sdim 489235633Sdim DEBUG(dbgs() << '\n'; dumpBBs()); 490212904Sdim 491235633Sdim BBInfo.clear(); 492193323Sed WaterList.clear(); 493193323Sed CPUsers.clear(); 494193323Sed CPEntries.clear(); 495193323Sed ImmBranches.clear(); 496193323Sed PushPopMIs.clear(); 497198090Srdivacky T2JumpTables.clear(); 498193323Sed 499193323Sed return MadeChange; 500193323Sed} 501193323Sed 502235633Sdim/// doInitialPlacement - Perform the initial placement of the constant pool 503193323Sed/// entries. To start with, we put them all at the end of the function. 504235633Sdimvoid 505235633SdimARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { 506193323Sed // Create the basic block to hold the CPE's. 507235633Sdim MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); 508235633Sdim MF->push_back(BB); 509193323Sed 510235633Sdim // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). 511235633Sdim unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); 512235633Sdim 513235633Sdim // Mark the basic block as required by the const-pool. 514235633Sdim // If AlignConstantIslands isn't set, use 4-byte alignment for everything. 515235633Sdim BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); 516235633Sdim 517235633Sdim // The function needs to be as aligned as the basic blocks. The linker may 518235633Sdim // move functions around based on their alignment. 519245431Sdim MF->ensureAlignment(BB->getAlignment()); 520235633Sdim 521235633Sdim // Order the entries in BB by descending alignment. That ensures correct 522235633Sdim // alignment of all entries as long as BB is sufficiently aligned. Keep 523235633Sdim // track of the insertion point for each alignment. We are going to bucket 524235633Sdim // sort the entries as they are created. 525235633Sdim SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); 526235633Sdim 527193323Sed // Add all of the constants from the constant pool to the end block, use an 528193323Sed // identity mapping of CPI's to CPE's. 529235633Sdim const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); 530193323Sed 531245431Sdim const DataLayout &TD = *MF->getTarget().getDataLayout(); 532193323Sed for (unsigned i = 0, e = CPs.size(); i != e; ++i) { 533193323Sed unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); 534235633Sdim assert(Size >= 4 && "Too small constant pool entry"); 535235633Sdim unsigned Align = CPs[i].getAlignment(); 536235633Sdim assert(isPowerOf2_32(Align) && "Invalid alignment"); 537235633Sdim // Verify that all constant pool entries are a multiple of their alignment. 538235633Sdim // If not, we would have to pad them out so that instructions stay aligned. 539235633Sdim assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); 540235633Sdim 541235633Sdim // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. 542235633Sdim unsigned LogAlign = Log2_32(Align); 543235633Sdim MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; 544193323Sed MachineInstr *CPEMI = 545235633Sdim BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) 546206124Srdivacky .addImm(i).addConstantPoolIndex(i).addImm(Size); 547193323Sed CPEMIs.push_back(CPEMI); 548193323Sed 549235633Sdim // Ensure that future entries with higher alignment get inserted before 550235633Sdim // CPEMI. This is bucket sort with iterators. 551235633Sdim for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) 552235633Sdim if (InsPoint[a] == InsAt) 553235633Sdim InsPoint[a] = CPEMI; 554235633Sdim 555193323Sed // Add a new CPEntry, but no corresponding CPUser yet. 556193323Sed std::vector<CPEntry> CPEs; 557193323Sed CPEs.push_back(CPEntry(CPEMI, i)); 558193323Sed CPEntries.push_back(CPEs); 559210299Sed ++NumCPEs; 560235633Sdim DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " 561235633Sdim << Size << ", align = " << Align <<'\n'); 562193323Sed } 563235633Sdim DEBUG(BB->dump()); 564193323Sed} 565193323Sed 566193323Sed/// BBHasFallthrough - Return true if the specified basic block can fallthrough 567193323Sed/// into the block immediately after it. 568193323Sedstatic bool BBHasFallthrough(MachineBasicBlock *MBB) { 569193323Sed // Get the next machine basic block in the function. 570193323Sed MachineFunction::iterator MBBI = MBB; 571210299Sed // Can't fall off end of function. 572210299Sed if (llvm::next(MBBI) == MBB->getParent()->end()) 573193323Sed return false; 574193323Sed 575200581Srdivacky MachineBasicBlock *NextBB = llvm::next(MBBI); 576193323Sed for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 577193323Sed E = MBB->succ_end(); I != E; ++I) 578193323Sed if (*I == NextBB) 579193323Sed return true; 580193323Sed 581193323Sed return false; 582193323Sed} 583193323Sed 584193323Sed/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, 585193323Sed/// look up the corresponding CPEntry. 586193323SedARMConstantIslands::CPEntry 587193323Sed*ARMConstantIslands::findConstPoolEntry(unsigned CPI, 588193323Sed const MachineInstr *CPEMI) { 589193323Sed std::vector<CPEntry> &CPEs = CPEntries[CPI]; 590193323Sed // Number of entries per constpool index should be small, just do a 591193323Sed // linear search. 592193323Sed for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { 593193323Sed if (CPEs[i].CPEMI == CPEMI) 594193323Sed return &CPEs[i]; 595193323Sed } 596193323Sed return NULL; 597193323Sed} 598193323Sed 599235633Sdim/// getCPELogAlign - Returns the required alignment of the constant pool entry 600235633Sdim/// represented by CPEMI. Alignment is measured in log2(bytes) units. 601235633Sdimunsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { 602235633Sdim assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); 603235633Sdim 604235633Sdim // Everything is 4-byte aligned unless AlignConstantIslands is set. 605235633Sdim if (!AlignConstantIslands) 606235633Sdim return 2; 607235633Sdim 608235633Sdim unsigned CPI = CPEMI->getOperand(1).getIndex(); 609235633Sdim assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); 610235633Sdim unsigned Align = MCP->getConstants()[CPI].getAlignment(); 611235633Sdim assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); 612235633Sdim return Log2_32(Align); 613235633Sdim} 614235633Sdim 615235633Sdim/// scanFunctionJumpTables - Do a scan of the function, building up 616199481Srdivacky/// information about the sizes of each block and the locations of all 617199481Srdivacky/// the jump tables. 618235633Sdimvoid ARMConstantIslands::scanFunctionJumpTables() { 619235633Sdim for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); 620199481Srdivacky MBBI != E; ++MBBI) { 621199481Srdivacky MachineBasicBlock &MBB = *MBBI; 622199481Srdivacky 623199481Srdivacky for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 624199481Srdivacky I != E; ++I) 625235633Sdim if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) 626199481Srdivacky T2JumpTables.push_back(I); 627199481Srdivacky } 628199481Srdivacky} 629199481Srdivacky 630235633Sdim/// initializeFunctionInfo - Do the initial scan of the function, building up 631193323Sed/// information about the sizes of each block, the location of all the water, 632193323Sed/// and finding all of the constant pool users. 633235633Sdimvoid ARMConstantIslands:: 634235633SdiminitializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { 635235633Sdim BBInfo.clear(); 636235633Sdim BBInfo.resize(MF->getNumBlockIDs()); 637199989Srdivacky 638235633Sdim // First thing, compute the size of all basic blocks, and see if the function 639235633Sdim // has any inline assembly in it. If so, we have to be conservative about 640235633Sdim // alignment assumptions, as we don't know for sure the size of any 641235633Sdim // instructions in the inline assembly. 642235633Sdim for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) 643235633Sdim computeBlockSize(I); 644235633Sdim 645235633Sdim // The known bits of the entry block offset are determined by the function 646235633Sdim // alignment. 647235633Sdim BBInfo.front().KnownBits = MF->getAlignment(); 648235633Sdim 649235633Sdim // Compute block offsets and known bits. 650235633Sdim adjustBBOffsetsAfter(MF->begin()); 651235633Sdim 652218893Sdim // Now go back through the instructions and build up our data structures. 653235633Sdim for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); 654193323Sed MBBI != E; ++MBBI) { 655193323Sed MachineBasicBlock &MBB = *MBBI; 656193323Sed 657193323Sed // If this block doesn't fall through into the next MBB, then this is 658193323Sed // 'water' that a constant pool island could be placed. 659193323Sed if (!BBHasFallthrough(&MBB)) 660193323Sed WaterList.push_back(&MBB); 661193323Sed 662193323Sed for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 663193323Sed I != E; ++I) { 664210299Sed if (I->isDebugValue()) 665210299Sed continue; 666193323Sed 667193323Sed int Opc = I->getOpcode(); 668235633Sdim if (I->isBranch()) { 669193323Sed bool isCond = false; 670193323Sed unsigned Bits = 0; 671193323Sed unsigned Scale = 1; 672193323Sed int UOpc = Opc; 673193323Sed switch (Opc) { 674198090Srdivacky default: 675198090Srdivacky continue; // Ignore other JT branches 676198090Srdivacky case ARM::t2BR_JT: 677198090Srdivacky T2JumpTables.push_back(I); 678198090Srdivacky continue; // Does not get an entry in ImmBranches 679193323Sed case ARM::Bcc: 680193323Sed isCond = true; 681193323Sed UOpc = ARM::B; 682193323Sed // Fallthrough 683193323Sed case ARM::B: 684193323Sed Bits = 24; 685193323Sed Scale = 4; 686193323Sed break; 687193323Sed case ARM::tBcc: 688193323Sed isCond = true; 689193323Sed UOpc = ARM::tB; 690193323Sed Bits = 8; 691193323Sed Scale = 2; 692193323Sed break; 693193323Sed case ARM::tB: 694193323Sed Bits = 11; 695193323Sed Scale = 2; 696193323Sed break; 697195340Sed case ARM::t2Bcc: 698195340Sed isCond = true; 699195340Sed UOpc = ARM::t2B; 700195340Sed Bits = 20; 701195340Sed Scale = 2; 702195340Sed break; 703195340Sed case ARM::t2B: 704195340Sed Bits = 24; 705195340Sed Scale = 2; 706195340Sed break; 707193323Sed } 708193323Sed 709193323Sed // Record this immediate branch. 710193323Sed unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; 711193323Sed ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); 712193323Sed } 713193323Sed 714193323Sed if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) 715193323Sed PushPopMIs.push_back(I); 716193323Sed 717198090Srdivacky if (Opc == ARM::CONSTPOOL_ENTRY) 718198090Srdivacky continue; 719198090Srdivacky 720193323Sed // Scan the instructions for constant pool operands. 721193323Sed for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) 722193323Sed if (I->getOperand(op).isCPI()) { 723193323Sed // We found one. The addressing mode tells us the max displacement 724193323Sed // from the PC that this instruction permits. 725193323Sed 726193323Sed // Basic size info comes from the TSFlags field. 727193323Sed unsigned Bits = 0; 728193323Sed unsigned Scale = 1; 729198090Srdivacky bool NegOk = false; 730198090Srdivacky bool IsSoImm = false; 731198090Srdivacky 732198090Srdivacky switch (Opc) { 733193323Sed default: 734198090Srdivacky llvm_unreachable("Unknown addressing mode for CP reference!"); 735198090Srdivacky 736198090Srdivacky // Taking the address of a CP entry. 737198090Srdivacky case ARM::LEApcrel: 738198090Srdivacky // This takes a SoImm, which is 8 bit immediate rotated. We'll 739198090Srdivacky // pretend the maximum offset is 255 * 4. Since each instruction 740199989Srdivacky // 4 byte wide, this is always correct. We'll check for other 741198090Srdivacky // displacements that fits in a SoImm as well. 742193323Sed Bits = 8; 743198090Srdivacky Scale = 4; 744198090Srdivacky NegOk = true; 745198090Srdivacky IsSoImm = true; 746193323Sed break; 747198090Srdivacky case ARM::t2LEApcrel: 748198090Srdivacky Bits = 12; 749198090Srdivacky NegOk = true; 750193323Sed break; 751198090Srdivacky case ARM::tLEApcrel: 752193323Sed Bits = 8; 753198090Srdivacky Scale = 4; 754193323Sed break; 755198090Srdivacky 756263509Sdim case ARM::LDRBi12: 757218893Sdim case ARM::LDRi12: 758198090Srdivacky case ARM::LDRcp: 759198090Srdivacky case ARM::t2LDRpci: 760198090Srdivacky Bits = 12; // +-offset_12 761198090Srdivacky NegOk = true; 762193323Sed break; 763198090Srdivacky 764198090Srdivacky case ARM::tLDRpci: 765193323Sed Bits = 8; 766193323Sed Scale = 4; // +(offset_8*4) 767193323Sed break; 768198090Srdivacky 769199481Srdivacky case ARM::VLDRD: 770199481Srdivacky case ARM::VLDRS: 771198090Srdivacky Bits = 8; 772198090Srdivacky Scale = 4; // +-(offset_8*4) 773198090Srdivacky NegOk = true; 774195340Sed break; 775193323Sed } 776193323Sed 777193323Sed // Remember that this is a user of a CP entry. 778193323Sed unsigned CPI = I->getOperand(op).getIndex(); 779193323Sed MachineInstr *CPEMI = CPEMIs[CPI]; 780193323Sed unsigned MaxOffs = ((1 << Bits)-1) * Scale; 781198090Srdivacky CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); 782193323Sed 783193323Sed // Increment corresponding CPEntry reference count. 784193323Sed CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); 785193323Sed assert(CPE && "Cannot find a corresponding CPEntry!"); 786193323Sed CPE->RefCount++; 787193323Sed 788193323Sed // Instructions can only use one CP entry, don't bother scanning the 789193323Sed // rest of the operands. 790193323Sed break; 791193323Sed } 792193323Sed } 793235633Sdim } 794235633Sdim} 795193323Sed 796235633Sdim/// computeBlockSize - Compute the size and some alignment information for MBB. 797235633Sdim/// This function updates BBInfo directly. 798235633Sdimvoid ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { 799235633Sdim BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; 800235633Sdim BBI.Size = 0; 801235633Sdim BBI.Unalign = 0; 802235633Sdim BBI.PostAlign = 0; 803193323Sed 804235633Sdim for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 805235633Sdim ++I) { 806235633Sdim BBI.Size += TII->GetInstSizeInBytes(I); 807235633Sdim // For inline asm, GetInstSizeInBytes returns a conservative estimate. 808235633Sdim // The actual size may be smaller, but still a multiple of the instr size. 809235633Sdim if (I->isInlineAsm()) 810235633Sdim BBI.Unalign = isThumb ? 1 : 2; 811235633Sdim // Also consider instructions that may be shrunk later. 812235633Sdim else if (isThumb && mayOptimizeThumb2Instruction(I)) 813235633Sdim BBI.Unalign = 1; 814193323Sed } 815235633Sdim 816235633Sdim // tBR_JTr contains a .align 2 directive. 817235633Sdim if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { 818235633Sdim BBI.PostAlign = 2; 819245431Sdim MBB->getParent()->ensureAlignment(2); 820235633Sdim } 821193323Sed} 822193323Sed 823235633Sdim/// getOffsetOf - Return the current offset of the specified machine instruction 824193323Sed/// from the start of the function. This offset changes as stuff is moved 825193323Sed/// around inside the function. 826235633Sdimunsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { 827193323Sed MachineBasicBlock *MBB = MI->getParent(); 828193323Sed 829193323Sed // The offset is composed of two things: the sum of the sizes of all MBB's 830193323Sed // before this instruction's block, and the offset from the start of the block 831193323Sed // it is in. 832235633Sdim unsigned Offset = BBInfo[MBB->getNumber()].Offset; 833193323Sed 834193323Sed // Sum instructions before MI in MBB. 835235633Sdim for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { 836193323Sed assert(I != MBB->end() && "Didn't find MI in its own basic block?"); 837193323Sed Offset += TII->GetInstSizeInBytes(I); 838193323Sed } 839235633Sdim return Offset; 840193323Sed} 841193323Sed 842193323Sed/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB 843193323Sed/// ID. 844193323Sedstatic bool CompareMBBNumbers(const MachineBasicBlock *LHS, 845193323Sed const MachineBasicBlock *RHS) { 846193323Sed return LHS->getNumber() < RHS->getNumber(); 847193323Sed} 848193323Sed 849235633Sdim/// updateForInsertedWaterBlock - When a block is newly inserted into the 850193323Sed/// machine function, it upsets all of the block numbers. Renumber the blocks 851193323Sed/// and update the arrays that parallel this numbering. 852235633Sdimvoid ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { 853218893Sdim // Renumber the MBB's to keep them consecutive. 854193323Sed NewBB->getParent()->RenumberBlocks(NewBB); 855193323Sed 856235633Sdim // Insert an entry into BBInfo to align it properly with the (newly 857193323Sed // renumbered) block numbers. 858235633Sdim BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); 859193323Sed 860193323Sed // Next, update WaterList. Specifically, we need to add NewMBB as having 861193323Sed // available water after it. 862198090Srdivacky water_iterator IP = 863193323Sed std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, 864193323Sed CompareMBBNumbers); 865193323Sed WaterList.insert(IP, NewBB); 866193323Sed} 867193323Sed 868193323Sed 869193323Sed/// Split the basic block containing MI into two blocks, which are joined by 870198396Srdivacky/// an unconditional branch. Update data structures and renumber blocks to 871193323Sed/// account for this change and returns the newly created block. 872235633SdimMachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { 873193323Sed MachineBasicBlock *OrigBB = MI->getParent(); 874193323Sed 875193323Sed // Create a new MBB for the code after the OrigBB. 876193323Sed MachineBasicBlock *NewBB = 877235633Sdim MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); 878193323Sed MachineFunction::iterator MBBI = OrigBB; ++MBBI; 879235633Sdim MF->insert(MBBI, NewBB); 880193323Sed 881193323Sed // Splice the instructions starting with MI over to NewBB. 882193323Sed NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); 883193323Sed 884193323Sed // Add an unconditional branch from OrigBB to NewBB. 885193323Sed // Note the new unconditional branch is not being recorded. 886193323Sed // There doesn't seem to be meaningful DebugInfo available; this doesn't 887193323Sed // correspond to anything in the source. 888198090Srdivacky unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; 889226890Sdim if (!isThumb) 890226890Sdim BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); 891226890Sdim else 892226890Sdim BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB) 893226890Sdim .addImm(ARMCC::AL).addReg(0); 894210299Sed ++NumSplit; 895193323Sed 896193323Sed // Update the CFG. All succs of OrigBB are now succs of NewBB. 897235633Sdim NewBB->transferSuccessors(OrigBB); 898193323Sed 899193323Sed // OrigBB branches to NewBB. 900193323Sed OrigBB->addSuccessor(NewBB); 901193323Sed 902193323Sed // Update internal data structures to account for the newly inserted MBB. 903235633Sdim // This is almost the same as updateForInsertedWaterBlock, except that 904193323Sed // the Water goes after OrigBB, not NewBB. 905235633Sdim MF->RenumberBlocks(NewBB); 906193323Sed 907235633Sdim // Insert an entry into BBInfo to align it properly with the (newly 908193323Sed // renumbered) block numbers. 909235633Sdim BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); 910193323Sed 911193323Sed // Next, update WaterList. Specifically, we need to add OrigMBB as having 912193323Sed // available water after it (but not if it's already there, which happens 913193323Sed // when splitting before a conditional branch that is followed by an 914193323Sed // unconditional branch - in that case we want to insert NewBB). 915198090Srdivacky water_iterator IP = 916193323Sed std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, 917193323Sed CompareMBBNumbers); 918193323Sed MachineBasicBlock* WaterBB = *IP; 919193323Sed if (WaterBB == OrigBB) 920200581Srdivacky WaterList.insert(llvm::next(IP), NewBB); 921193323Sed else 922193323Sed WaterList.insert(IP, OrigBB); 923198396Srdivacky NewWaterList.insert(OrigBB); 924193323Sed 925212904Sdim // Figure out how large the OrigBB is. As the first half of the original 926212904Sdim // block, it cannot contain a tablejump. The size includes 927212904Sdim // the new jump we added. (It should be possible to do this without 928212904Sdim // recounting everything, but it's very confusing, and this is rarely 929212904Sdim // executed.) 930235633Sdim computeBlockSize(OrigBB); 931212904Sdim 932212904Sdim // Figure out how large the NewMBB is. As the second half of the original 933212904Sdim // block, it may contain a tablejump. 934235633Sdim computeBlockSize(NewBB); 935212904Sdim 936193323Sed // All BBOffsets following these blocks must be modified. 937235633Sdim adjustBBOffsetsAfter(OrigBB); 938193323Sed 939193323Sed return NewBB; 940193323Sed} 941193323Sed 942235633Sdim/// getUserOffset - Compute the offset of U.MI as seen by the hardware 943235633Sdim/// displacement computation. Update U.KnownAlignment to match its current 944235633Sdim/// basic block location. 945235633Sdimunsigned ARMConstantIslands::getUserOffset(CPUser &U) const { 946235633Sdim unsigned UserOffset = getOffsetOf(U.MI); 947235633Sdim const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; 948235633Sdim unsigned KnownBits = BBI.internalKnownBits(); 949235633Sdim 950235633Sdim // The value read from PC is offset from the actual instruction address. 951235633Sdim UserOffset += (isThumb ? 4 : 8); 952235633Sdim 953235633Sdim // Because of inline assembly, we may not know the alignment (mod 4) of U.MI. 954235633Sdim // Make sure U.getMaxDisp() returns a constrained range. 955235633Sdim U.KnownAlignment = (KnownBits >= 2); 956235633Sdim 957235633Sdim // On Thumb, offsets==2 mod 4 are rounded down by the hardware for 958235633Sdim // purposes of the displacement computation; compensate for that here. 959235633Sdim // For unknown alignments, getMaxDisp() constrains the range instead. 960235633Sdim if (isThumb && U.KnownAlignment) 961235633Sdim UserOffset &= ~3u; 962235633Sdim 963235633Sdim return UserOffset; 964235633Sdim} 965235633Sdim 966235633Sdim/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool 967193323Sed/// reference) is within MaxDisp of TrialOffset (a proposed location of a 968193323Sed/// constant pool entry). 969235633Sdim/// UserOffset is computed by getUserOffset above to include PC adjustments. If 970235633Sdim/// the mod 4 alignment of UserOffset is not known, the uncertainty must be 971235633Sdim/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. 972235633Sdimbool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, 973198090Srdivacky unsigned TrialOffset, unsigned MaxDisp, 974198090Srdivacky bool NegativeOK, bool IsSoImm) { 975193323Sed if (UserOffset <= TrialOffset) { 976193323Sed // User before the Trial. 977198090Srdivacky if (TrialOffset - UserOffset <= MaxDisp) 978193323Sed return true; 979198090Srdivacky // FIXME: Make use full range of soimm values. 980193323Sed } else if (NegativeOK) { 981198090Srdivacky if (UserOffset - TrialOffset <= MaxDisp) 982193323Sed return true; 983198090Srdivacky // FIXME: Make use full range of soimm values. 984193323Sed } 985193323Sed return false; 986193323Sed} 987193323Sed 988235633Sdim/// isWaterInRange - Returns true if a CPE placed after the specified 989193323Sed/// Water (a basic block) will be in range for the specific MI. 990235633Sdim/// 991235633Sdim/// Compute how much the function will grow by inserting a CPE after Water. 992235633Sdimbool ARMConstantIslands::isWaterInRange(unsigned UserOffset, 993235633Sdim MachineBasicBlock* Water, CPUser &U, 994235633Sdim unsigned &Growth) { 995235633Sdim unsigned CPELogAlign = getCPELogAlign(U.CPEMI); 996235633Sdim unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); 997235633Sdim unsigned NextBlockOffset, NextBlockAlignment; 998235633Sdim MachineFunction::const_iterator NextBlock = Water; 999235633Sdim if (++NextBlock == MF->end()) { 1000235633Sdim NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); 1001235633Sdim NextBlockAlignment = 0; 1002235633Sdim } else { 1003235633Sdim NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; 1004235633Sdim NextBlockAlignment = NextBlock->getAlignment(); 1005235633Sdim } 1006235633Sdim unsigned Size = U.CPEMI->getOperand(2).getImm(); 1007235633Sdim unsigned CPEEnd = CPEOffset + Size; 1008193323Sed 1009235633Sdim // The CPE may be able to hide in the alignment padding before the next 1010235633Sdim // block. It may also cause more padding to be required if it is more aligned 1011235633Sdim // that the next block. 1012235633Sdim if (CPEEnd > NextBlockOffset) { 1013235633Sdim Growth = CPEEnd - NextBlockOffset; 1014235633Sdim // Compute the padding that would go at the end of the CPE to align the next 1015235633Sdim // block. 1016235633Sdim Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); 1017193323Sed 1018235633Sdim // If the CPE is to be inserted before the instruction, that will raise 1019235633Sdim // the offset of the instruction. Also account for unknown alignment padding 1020235633Sdim // in blocks between CPE and the user. 1021235633Sdim if (CPEOffset < UserOffset) 1022235633Sdim UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); 1023235633Sdim } else 1024235633Sdim // CPE fits in existing padding. 1025235633Sdim Growth = 0; 1026193323Sed 1027235633Sdim return isOffsetInRange(UserOffset, CPEOffset, U); 1028193323Sed} 1029193323Sed 1030235633Sdim/// isCPEntryInRange - Returns true if the distance between specific MI and 1031193323Sed/// specific ConstPool entry instruction can fit in MI's displacement field. 1032235633Sdimbool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, 1033198090Srdivacky MachineInstr *CPEMI, unsigned MaxDisp, 1034198090Srdivacky bool NegOk, bool DoDump) { 1035235633Sdim unsigned CPEOffset = getOffsetOf(CPEMI); 1036193323Sed 1037193323Sed if (DoDump) { 1038235633Sdim DEBUG({ 1039235633Sdim unsigned Block = MI->getParent()->getNumber(); 1040235633Sdim const BasicBlockInfo &BBI = BBInfo[Block]; 1041235633Sdim dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() 1042235633Sdim << " max delta=" << MaxDisp 1043235633Sdim << format(" insn address=%#x", UserOffset) 1044235633Sdim << " in BB#" << Block << ": " 1045235633Sdim << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI 1046235633Sdim << format("CPE address=%#x offset=%+d: ", CPEOffset, 1047235633Sdim int(CPEOffset-UserOffset)); 1048235633Sdim }); 1049193323Sed } 1050193323Sed 1051235633Sdim return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); 1052193323Sed} 1053193323Sed 1054193323Sed#ifndef NDEBUG 1055193323Sed/// BBIsJumpedOver - Return true of the specified basic block's only predecessor 1056193323Sed/// unconditionally branches to its only successor. 1057193323Sedstatic bool BBIsJumpedOver(MachineBasicBlock *MBB) { 1058193323Sed if (MBB->pred_size() != 1 || MBB->succ_size() != 1) 1059193323Sed return false; 1060193323Sed 1061193323Sed MachineBasicBlock *Succ = *MBB->succ_begin(); 1062193323Sed MachineBasicBlock *Pred = *MBB->pred_begin(); 1063193323Sed MachineInstr *PredMI = &Pred->back(); 1064195340Sed if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB 1065195340Sed || PredMI->getOpcode() == ARM::t2B) 1066193323Sed return PredMI->getOperand(0).getMBB() == Succ; 1067193323Sed return false; 1068193323Sed} 1069193323Sed#endif // NDEBUG 1070193323Sed 1071235633Sdimvoid ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { 1072235633Sdim unsigned BBNum = BB->getNumber(); 1073235633Sdim for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { 1074235633Sdim // Get the offset and known bits at the end of the layout predecessor. 1075235633Sdim // Include the alignment of the current block. 1076235633Sdim unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); 1077235633Sdim unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); 1078235633Sdim unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); 1079235633Sdim 1080235633Sdim // This is where block i begins. Stop if the offset is already correct, 1081235633Sdim // and we have updated 2 blocks. This is the maximum number of blocks 1082235633Sdim // changed before calling this function. 1083235633Sdim if (i > BBNum + 2 && 1084235633Sdim BBInfo[i].Offset == Offset && 1085235633Sdim BBInfo[i].KnownBits == KnownBits) 1086235633Sdim break; 1087235633Sdim 1088235633Sdim BBInfo[i].Offset = Offset; 1089235633Sdim BBInfo[i].KnownBits = KnownBits; 1090193323Sed } 1091193323Sed} 1092193323Sed 1093235633Sdim/// decrementCPEReferenceCount - find the constant pool entry with index CPI 1094193323Sed/// and instruction CPEMI, and decrement its refcount. If the refcount 1095193323Sed/// becomes 0 remove the entry and instruction. Returns true if we removed 1096193323Sed/// the entry, false if we didn't. 1097193323Sed 1098235633Sdimbool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, 1099235633Sdim MachineInstr *CPEMI) { 1100193323Sed // Find the old entry. Eliminate it if it is no longer used. 1101193323Sed CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); 1102193323Sed assert(CPE && "Unexpected!"); 1103193323Sed if (--CPE->RefCount == 0) { 1104235633Sdim removeDeadCPEMI(CPEMI); 1105193323Sed CPE->CPEMI = NULL; 1106210299Sed --NumCPEs; 1107193323Sed return true; 1108193323Sed } 1109193323Sed return false; 1110193323Sed} 1111193323Sed 1112193323Sed/// LookForCPEntryInRange - see if the currently referenced CPE is in range; 1113193323Sed/// if not, see if an in-range clone of the CPE is in range, and if so, 1114193323Sed/// change the data structures so the user references the clone. Returns: 1115193323Sed/// 0 = no existing entry found 1116193323Sed/// 1 = entry found, and there were no code insertions or deletions 1117193323Sed/// 2 = entry found, and there were code insertions or deletions 1118235633Sdimint ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) 1119193323Sed{ 1120193323Sed MachineInstr *UserMI = U.MI; 1121193323Sed MachineInstr *CPEMI = U.CPEMI; 1122193323Sed 1123193323Sed // Check to see if the CPE is already in-range. 1124235633Sdim if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, 1125235633Sdim true)) { 1126235633Sdim DEBUG(dbgs() << "In range\n"); 1127193323Sed return 1; 1128193323Sed } 1129193323Sed 1130193323Sed // No. Look for previously created clones of the CPE that are in range. 1131193323Sed unsigned CPI = CPEMI->getOperand(1).getIndex(); 1132193323Sed std::vector<CPEntry> &CPEs = CPEntries[CPI]; 1133193323Sed for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { 1134193323Sed // We already tried this one 1135193323Sed if (CPEs[i].CPEMI == CPEMI) 1136193323Sed continue; 1137193323Sed // Removing CPEs can leave empty entries, skip 1138193323Sed if (CPEs[i].CPEMI == NULL) 1139193323Sed continue; 1140235633Sdim if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), 1141235633Sdim U.NegOk)) { 1142235633Sdim DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" 1143198090Srdivacky << CPEs[i].CPI << "\n"); 1144193323Sed // Point the CPUser node to the replacement 1145193323Sed U.CPEMI = CPEs[i].CPEMI; 1146193323Sed // Change the CPI in the instruction operand to refer to the clone. 1147193323Sed for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) 1148193323Sed if (UserMI->getOperand(j).isCPI()) { 1149193323Sed UserMI->getOperand(j).setIndex(CPEs[i].CPI); 1150193323Sed break; 1151193323Sed } 1152193323Sed // Adjust the refcount of the clone... 1153193323Sed CPEs[i].RefCount++; 1154193323Sed // ...and the original. If we didn't remove the old entry, none of the 1155193323Sed // addresses changed, so we don't need another pass. 1156235633Sdim return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; 1157193323Sed } 1158193323Sed } 1159193323Sed return 0; 1160193323Sed} 1161193323Sed 1162193323Sed/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in 1163193323Sed/// the specific unconditional branch instruction. 1164193323Sedstatic inline unsigned getUnconditionalBrDisp(int Opc) { 1165195340Sed switch (Opc) { 1166195340Sed case ARM::tB: 1167195340Sed return ((1<<10)-1)*2; 1168195340Sed case ARM::t2B: 1169195340Sed return ((1<<23)-1)*2; 1170195340Sed default: 1171195340Sed break; 1172195340Sed } 1173198090Srdivacky 1174195340Sed return ((1<<23)-1)*4; 1175193323Sed} 1176193323Sed 1177235633Sdim/// findAvailableWater - Look for an existing entry in the WaterList in which 1178193323Sed/// we can place the CPE referenced from U so it's within range of U's MI. 1179198396Srdivacky/// Returns true if found, false if not. If it returns true, WaterIter 1180198090Srdivacky/// is set to the WaterList entry. For Thumb, prefer water that will not 1181198090Srdivacky/// introduce padding to water that will. To ensure that this pass 1182198090Srdivacky/// terminates, the CPE location for a particular CPUser is only allowed to 1183198090Srdivacky/// move to a lower address, so search backward from the end of the list and 1184198090Srdivacky/// prefer the first water that is in range. 1185235633Sdimbool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, 1186198396Srdivacky water_iterator &WaterIter) { 1187198090Srdivacky if (WaterList.empty()) 1188198090Srdivacky return false; 1189193323Sed 1190235633Sdim unsigned BestGrowth = ~0u; 1191235633Sdim for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; 1192235633Sdim --IP) { 1193198090Srdivacky MachineBasicBlock* WaterBB = *IP; 1194198396Srdivacky // Check if water is in range and is either at a lower address than the 1195198396Srdivacky // current "high water mark" or a new water block that was created since 1196198396Srdivacky // the previous iteration by inserting an unconditional branch. In the 1197198396Srdivacky // latter case, we want to allow resetting the high water mark back to 1198198396Srdivacky // this new water since we haven't seen it before. Inserting branches 1199198396Srdivacky // should be relatively uncommon and when it does happen, we want to be 1200198396Srdivacky // sure to take advantage of it for all the CPEs near that block, so that 1201198396Srdivacky // we don't insert more branches than necessary. 1202235633Sdim unsigned Growth; 1203235633Sdim if (isWaterInRange(UserOffset, WaterBB, U, Growth) && 1204198396Srdivacky (WaterBB->getNumber() < U.HighWaterMark->getNumber() || 1205235633Sdim NewWaterList.count(WaterBB)) && Growth < BestGrowth) { 1206235633Sdim // This is the least amount of required padding seen so far. 1207235633Sdim BestGrowth = Growth; 1208235633Sdim WaterIter = IP; 1209235633Sdim DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() 1210235633Sdim << " Growth=" << Growth << '\n'); 1211235633Sdim 1212235633Sdim // Keep looking unless it is perfect. 1213235633Sdim if (BestGrowth == 0) 1214198090Srdivacky return true; 1215193323Sed } 1216198090Srdivacky if (IP == B) 1217198090Srdivacky break; 1218193323Sed } 1219235633Sdim return BestGrowth != ~0u; 1220193323Sed} 1221193323Sed 1222235633Sdim/// createNewWater - No existing WaterList entry will work for 1223193323Sed/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the 1224193323Sed/// block is used if in range, and the conditional branch munged so control 1225193323Sed/// flow is correct. Otherwise the block is split to create a hole with an 1226198090Srdivacky/// unconditional branch around it. In either case NewMBB is set to a 1227193323Sed/// block following which the new island can be inserted (the WaterList 1228193323Sed/// is not adjusted). 1229235633Sdimvoid ARMConstantIslands::createNewWater(unsigned CPUserIndex, 1230198090Srdivacky unsigned UserOffset, 1231198090Srdivacky MachineBasicBlock *&NewMBB) { 1232193323Sed CPUser &U = CPUsers[CPUserIndex]; 1233193323Sed MachineInstr *UserMI = U.MI; 1234193323Sed MachineInstr *CPEMI = U.CPEMI; 1235235633Sdim unsigned CPELogAlign = getCPELogAlign(CPEMI); 1236193323Sed MachineBasicBlock *UserMBB = UserMI->getParent(); 1237235633Sdim const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; 1238193323Sed 1239198113Srdivacky // If the block does not end in an unconditional branch already, and if the 1240198113Srdivacky // end of the block is within range, make new water there. (The addition 1241198113Srdivacky // below is for the unconditional branch we will be adding: 4 bytes on ARM + 1242235633Sdim // Thumb2, 2 on Thumb1. 1243235633Sdim if (BBHasFallthrough(UserMBB)) { 1244235633Sdim // Size of branch to insert. 1245235633Sdim unsigned Delta = isThumb1 ? 2 : 4; 1246235633Sdim // Compute the offset where the CPE will begin. 1247245431Sdim unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; 1248193323Sed 1249235633Sdim if (isOffsetInRange(UserOffset, CPEOffset, U)) { 1250235633Sdim DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() 1251235633Sdim << format(", expected CPE offset %#x\n", CPEOffset)); 1252235633Sdim NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); 1253235633Sdim // Add an unconditional branch from UserMBB to fallthrough block. Record 1254235633Sdim // it for branch lengthening; this new branch will not get out of range, 1255235633Sdim // but if the preceding conditional branch is out of range, the targets 1256235633Sdim // will be exchanged, and the altered branch may be out of range, so the 1257235633Sdim // machinery has to know about it. 1258235633Sdim int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; 1259235633Sdim if (!isThumb) 1260235633Sdim BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); 1261235633Sdim else 1262235633Sdim BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) 1263235633Sdim .addImm(ARMCC::AL).addReg(0); 1264235633Sdim unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); 1265235633Sdim ImmBranches.push_back(ImmBranch(&UserMBB->back(), 1266235633Sdim MaxDisp, false, UncondBr)); 1267235633Sdim BBInfo[UserMBB->getNumber()].Size += Delta; 1268235633Sdim adjustBBOffsetsAfter(UserMBB); 1269235633Sdim return; 1270193323Sed } 1271235633Sdim } 1272212904Sdim 1273235633Sdim // What a big block. Find a place within the block to split it. This is a 1274235633Sdim // little tricky on Thumb1 since instructions are 2 bytes and constant pool 1275235633Sdim // entries are 4 bytes: if instruction I references island CPE, and 1276235633Sdim // instruction I+1 references CPE', it will not work well to put CPE as far 1277235633Sdim // forward as possible, since then CPE' cannot immediately follow it (that 1278235633Sdim // location is 2 bytes farther away from I+1 than CPE was from I) and we'd 1279235633Sdim // need to create a new island. So, we make a first guess, then walk through 1280235633Sdim // the instructions between the one currently being looked at and the 1281235633Sdim // possible insertion point, and make sure any other instructions that 1282235633Sdim // reference CPEs will be able to use the same island area; if not, we back 1283235633Sdim // up the insertion point. 1284212904Sdim 1285235633Sdim // Try to split the block so it's fully aligned. Compute the latest split 1286245431Sdim // point where we can add a 4-byte branch instruction, and then align to 1287245431Sdim // LogAlign which is the largest possible alignment in the function. 1288235633Sdim unsigned LogAlign = MF->getAlignment(); 1289235633Sdim assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); 1290235633Sdim unsigned KnownBits = UserBBI.internalKnownBits(); 1291235633Sdim unsigned UPad = UnknownPadding(LogAlign, KnownBits); 1292245431Sdim unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad; 1293235633Sdim DEBUG(dbgs() << format("Split in middle of big block before %#x", 1294235633Sdim BaseInsertOffset)); 1295235633Sdim 1296235633Sdim // The 4 in the following is for the unconditional branch we'll be inserting 1297235633Sdim // (allows for long branch on Thumb1). Alignment of the island is handled 1298235633Sdim // inside isOffsetInRange. 1299235633Sdim BaseInsertOffset -= 4; 1300235633Sdim 1301235633Sdim DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) 1302235633Sdim << " la=" << LogAlign 1303235633Sdim << " kb=" << KnownBits 1304235633Sdim << " up=" << UPad << '\n'); 1305235633Sdim 1306235633Sdim // This could point off the end of the block if we've already got constant 1307235633Sdim // pool entries following this block; only the last one is in the water list. 1308235633Sdim // Back past any possible branches (allow for a conditional and a maximally 1309235633Sdim // long unconditional). 1310245431Sdim if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { 1311245431Sdim BaseInsertOffset = UserBBI.postOffset() - UPad - 8; 1312245431Sdim DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); 1313245431Sdim } 1314245431Sdim unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + 1315235633Sdim CPEMI->getOperand(2).getImm(); 1316235633Sdim MachineBasicBlock::iterator MI = UserMI; 1317235633Sdim ++MI; 1318235633Sdim unsigned CPUIndex = CPUserIndex+1; 1319235633Sdim unsigned NumCPUsers = CPUsers.size(); 1320235633Sdim MachineInstr *LastIT = 0; 1321235633Sdim for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); 1322235633Sdim Offset < BaseInsertOffset; 1323235633Sdim Offset += TII->GetInstSizeInBytes(MI), 1324235633Sdim MI = llvm::next(MI)) { 1325245431Sdim assert(MI != UserMBB->end() && "Fell off end of block"); 1326235633Sdim if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { 1327235633Sdim CPUser &U = CPUsers[CPUIndex]; 1328235633Sdim if (!isOffsetInRange(Offset, EndInsertOffset, U)) { 1329235633Sdim // Shift intertion point by one unit of alignment so it is within reach. 1330235633Sdim BaseInsertOffset -= 1u << LogAlign; 1331235633Sdim EndInsertOffset -= 1u << LogAlign; 1332235633Sdim } 1333235633Sdim // This is overly conservative, as we don't account for CPEMIs being 1334235633Sdim // reused within the block, but it doesn't matter much. Also assume CPEs 1335235633Sdim // are added in order with alignment padding. We may eventually be able 1336235633Sdim // to pack the aligned CPEs better. 1337245431Sdim EndInsertOffset += U.CPEMI->getOperand(2).getImm(); 1338235633Sdim CPUIndex++; 1339212904Sdim } 1340235633Sdim 1341235633Sdim // Remember the last IT instruction. 1342235633Sdim if (MI->getOpcode() == ARM::t2IT) 1343235633Sdim LastIT = MI; 1344193323Sed } 1345235633Sdim 1346235633Sdim --MI; 1347235633Sdim 1348235633Sdim // Avoid splitting an IT block. 1349235633Sdim if (LastIT) { 1350235633Sdim unsigned PredReg = 0; 1351235633Sdim ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); 1352235633Sdim if (CC != ARMCC::AL) 1353235633Sdim MI = LastIT; 1354235633Sdim } 1355235633Sdim NewMBB = splitBlockBeforeInstr(MI); 1356193323Sed} 1357193323Sed 1358235633Sdim/// handleConstantPoolUser - Analyze the specified user, checking to see if it 1359193323Sed/// is out-of-range. If so, pick up the constant pool value and move it some 1360193323Sed/// place in-range. Return true if we changed any addresses (thus must run 1361193323Sed/// another pass of branch lengthening), false otherwise. 1362235633Sdimbool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { 1363193323Sed CPUser &U = CPUsers[CPUserIndex]; 1364193323Sed MachineInstr *UserMI = U.MI; 1365193323Sed MachineInstr *CPEMI = U.CPEMI; 1366193323Sed unsigned CPI = CPEMI->getOperand(1).getIndex(); 1367193323Sed unsigned Size = CPEMI->getOperand(2).getImm(); 1368235633Sdim // Compute this only once, it's expensive. 1369235633Sdim unsigned UserOffset = getUserOffset(U); 1370193323Sed 1371193323Sed // See if the current entry is within range, or there is a clone of it 1372193323Sed // in range. 1373235633Sdim int result = findInRangeCPEntry(U, UserOffset); 1374193323Sed if (result==1) return false; 1375193323Sed else if (result==2) return true; 1376193323Sed 1377193323Sed // No existing clone of this CPE is within range. 1378193323Sed // We will be generating a new clone. Get a UID for it. 1379218893Sdim unsigned ID = AFI->createPICLabelUId(); 1380193323Sed 1381198090Srdivacky // Look for water where we can place this CPE. 1382235633Sdim MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); 1383198396Srdivacky MachineBasicBlock *NewMBB; 1384198396Srdivacky water_iterator IP; 1385235633Sdim if (findAvailableWater(U, UserOffset, IP)) { 1386235633Sdim DEBUG(dbgs() << "Found water in range\n"); 1387198396Srdivacky MachineBasicBlock *WaterBB = *IP; 1388198396Srdivacky 1389198396Srdivacky // If the original WaterList entry was "new water" on this iteration, 1390198396Srdivacky // propagate that to the new island. This is just keeping NewWaterList 1391198396Srdivacky // updated to match the WaterList, which will be updated below. 1392245431Sdim if (NewWaterList.erase(WaterBB)) 1393198396Srdivacky NewWaterList.insert(NewIsland); 1394245431Sdim 1395198396Srdivacky // The new CPE goes before the following block (NewMBB). 1396200581Srdivacky NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); 1397198396Srdivacky 1398198396Srdivacky } else { 1399193323Sed // No water found. 1400235633Sdim DEBUG(dbgs() << "No water found\n"); 1401235633Sdim createNewWater(CPUserIndex, UserOffset, NewMBB); 1402198396Srdivacky 1403235633Sdim // splitBlockBeforeInstr adds to WaterList, which is important when it is 1404198396Srdivacky // called while handling branches so that the water will be seen on the 1405198396Srdivacky // next iteration for constant pools, but in this context, we don't want 1406198396Srdivacky // it. Check for this so it will be removed from the WaterList. 1407198396Srdivacky // Also remove any entry from NewWaterList. 1408198396Srdivacky MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); 1409198396Srdivacky IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); 1410198396Srdivacky if (IP != WaterList.end()) 1411198396Srdivacky NewWaterList.erase(WaterBB); 1412198396Srdivacky 1413198396Srdivacky // We are adding new water. Update NewWaterList. 1414198396Srdivacky NewWaterList.insert(NewIsland); 1415193323Sed } 1416193323Sed 1417198396Srdivacky // Remove the original WaterList entry; we want subsequent insertions in 1418198396Srdivacky // this vicinity to go after the one we're about to insert. This 1419198396Srdivacky // considerably reduces the number of times we have to move the same CPE 1420198396Srdivacky // more than once and is also important to ensure the algorithm terminates. 1421198396Srdivacky if (IP != WaterList.end()) 1422198396Srdivacky WaterList.erase(IP); 1423198396Srdivacky 1424193323Sed // Okay, we know we can put an island before NewMBB now, do it! 1425235633Sdim MF->insert(NewMBB, NewIsland); 1426193323Sed 1427193323Sed // Update internal data structures to account for the newly inserted MBB. 1428235633Sdim updateForInsertedWaterBlock(NewIsland); 1429193323Sed 1430193323Sed // Decrement the old entry, and remove it if refcount becomes 0. 1431235633Sdim decrementCPEReferenceCount(CPI, CPEMI); 1432193323Sed 1433193323Sed // Now that we have an island to add the CPE to, clone the original CPE and 1434193323Sed // add it to the island. 1435198113Srdivacky U.HighWaterMark = NewIsland; 1436206124Srdivacky U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) 1437193323Sed .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); 1438193323Sed CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); 1439210299Sed ++NumCPEs; 1440193323Sed 1441235633Sdim // Mark the basic block as aligned as required by the const-pool entry. 1442235633Sdim NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); 1443235633Sdim 1444193323Sed // Increase the size of the island block to account for the new entry. 1445235633Sdim BBInfo[NewIsland->getNumber()].Size += Size; 1446235633Sdim adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); 1447193323Sed 1448193323Sed // Finally, change the CPI in the instruction operand to be ID. 1449193323Sed for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) 1450193323Sed if (UserMI->getOperand(i).isCPI()) { 1451193323Sed UserMI->getOperand(i).setIndex(ID); 1452193323Sed break; 1453193323Sed } 1454193323Sed 1455235633Sdim DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI 1456235633Sdim << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); 1457193323Sed 1458193323Sed return true; 1459193323Sed} 1460193323Sed 1461235633Sdim/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update 1462193323Sed/// sizes and offsets of impacted basic blocks. 1463235633Sdimvoid ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { 1464193323Sed MachineBasicBlock *CPEBB = CPEMI->getParent(); 1465193323Sed unsigned Size = CPEMI->getOperand(2).getImm(); 1466193323Sed CPEMI->eraseFromParent(); 1467235633Sdim BBInfo[CPEBB->getNumber()].Size -= Size; 1468193323Sed // All succeeding offsets have the current size value added in, fix this. 1469193323Sed if (CPEBB->empty()) { 1470235633Sdim BBInfo[CPEBB->getNumber()].Size = 0; 1471235633Sdim 1472252723Sdim // This block no longer needs to be aligned. 1473235633Sdim CPEBB->setAlignment(0); 1474235633Sdim } else 1475235633Sdim // Entries are sorted by descending alignment, so realign from the front. 1476235633Sdim CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); 1477235633Sdim 1478235633Sdim adjustBBOffsetsAfter(CPEBB); 1479193323Sed // An island has only one predecessor BB and one successor BB. Check if 1480193323Sed // this BB's predecessor jumps directly to this BB's successor. This 1481193323Sed // shouldn't happen currently. 1482193323Sed assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); 1483193323Sed // FIXME: remove the empty blocks after all the work is done? 1484193323Sed} 1485193323Sed 1486235633Sdim/// removeUnusedCPEntries - Remove constant pool entries whose refcounts 1487193323Sed/// are zero. 1488235633Sdimbool ARMConstantIslands::removeUnusedCPEntries() { 1489193323Sed unsigned MadeChange = false; 1490193323Sed for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { 1491193323Sed std::vector<CPEntry> &CPEs = CPEntries[i]; 1492193323Sed for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { 1493193323Sed if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { 1494235633Sdim removeDeadCPEMI(CPEs[j].CPEMI); 1495193323Sed CPEs[j].CPEMI = NULL; 1496193323Sed MadeChange = true; 1497193323Sed } 1498193323Sed } 1499193323Sed } 1500193323Sed return MadeChange; 1501193323Sed} 1502193323Sed 1503235633Sdim/// isBBInRange - Returns true if the distance between specific MI and 1504193323Sed/// specific BB can fit in MI's displacement field. 1505235633Sdimbool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, 1506193323Sed unsigned MaxDisp) { 1507193323Sed unsigned PCAdj = isThumb ? 4 : 8; 1508235633Sdim unsigned BrOffset = getOffsetOf(MI) + PCAdj; 1509235633Sdim unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; 1510193323Sed 1511235633Sdim DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() 1512198090Srdivacky << " from BB#" << MI->getParent()->getNumber() 1513198090Srdivacky << " max delta=" << MaxDisp 1514235633Sdim << " from " << getOffsetOf(MI) << " to " << DestOffset 1515198090Srdivacky << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); 1516193323Sed 1517193323Sed if (BrOffset <= DestOffset) { 1518193323Sed // Branch before the Dest. 1519193323Sed if (DestOffset-BrOffset <= MaxDisp) 1520193323Sed return true; 1521193323Sed } else { 1522193323Sed if (BrOffset-DestOffset <= MaxDisp) 1523193323Sed return true; 1524193323Sed } 1525193323Sed return false; 1526193323Sed} 1527193323Sed 1528235633Sdim/// fixupImmediateBr - Fix up an immediate branch whose destination is too far 1529193323Sed/// away to fit in its displacement field. 1530235633Sdimbool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) { 1531193323Sed MachineInstr *MI = Br.MI; 1532193323Sed MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 1533193323Sed 1534193323Sed // Check to see if the DestBB is already in-range. 1535235633Sdim if (isBBInRange(MI, DestBB, Br.MaxDisp)) 1536193323Sed return false; 1537193323Sed 1538193323Sed if (!Br.isCond) 1539235633Sdim return fixupUnconditionalBr(Br); 1540235633Sdim return fixupConditionalBr(Br); 1541193323Sed} 1542193323Sed 1543235633Sdim/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is 1544193323Sed/// too far away to fit in its displacement field. If the LR register has been 1545193323Sed/// spilled in the epilogue, then we can use BL to implement a far jump. 1546193323Sed/// Otherwise, add an intermediate branch instruction to a branch. 1547193323Sedbool 1548235633SdimARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { 1549193323Sed MachineInstr *MI = Br.MI; 1550193323Sed MachineBasicBlock *MBB = MI->getParent(); 1551198090Srdivacky if (!isThumb1) 1552235633Sdim llvm_unreachable("fixupUnconditionalBr is Thumb1 only!"); 1553193323Sed 1554193323Sed // Use BL to implement far jump. 1555193323Sed Br.MaxDisp = (1 << 21) * 2; 1556193323Sed MI->setDesc(TII->get(ARM::tBfar)); 1557235633Sdim BBInfo[MBB->getNumber()].Size += 2; 1558235633Sdim adjustBBOffsetsAfter(MBB); 1559193323Sed HasFarJump = true; 1560210299Sed ++NumUBrFixed; 1561193323Sed 1562235633Sdim DEBUG(dbgs() << " Changed B to long jump " << *MI); 1563193323Sed 1564193323Sed return true; 1565193323Sed} 1566193323Sed 1567235633Sdim/// fixupConditionalBr - Fix up a conditional branch whose destination is too 1568193323Sed/// far away to fit in its displacement field. It is converted to an inverse 1569193323Sed/// conditional branch + an unconditional branch to the destination. 1570193323Sedbool 1571235633SdimARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { 1572193323Sed MachineInstr *MI = Br.MI; 1573193323Sed MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 1574193323Sed 1575193323Sed // Add an unconditional branch to the destination and invert the branch 1576193323Sed // condition to jump over it: 1577193323Sed // blt L1 1578193323Sed // => 1579193323Sed // bge L2 1580193323Sed // b L1 1581193323Sed // L2: 1582193323Sed ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); 1583193323Sed CC = ARMCC::getOppositeCondition(CC); 1584193323Sed unsigned CCReg = MI->getOperand(2).getReg(); 1585193323Sed 1586193323Sed // If the branch is at the end of its MBB and that has a fall-through block, 1587193323Sed // direct the updated conditional branch to the fall-through block. Otherwise, 1588193323Sed // split the MBB before the next instruction. 1589193323Sed MachineBasicBlock *MBB = MI->getParent(); 1590193323Sed MachineInstr *BMI = &MBB->back(); 1591193323Sed bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); 1592193323Sed 1593210299Sed ++NumCBrFixed; 1594193323Sed if (BMI != MI) { 1595200581Srdivacky if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && 1596193323Sed BMI->getOpcode() == Br.UncondBr) { 1597193323Sed // Last MI in the BB is an unconditional branch. Can we simply invert the 1598193323Sed // condition and swap destinations: 1599193323Sed // beq L1 1600193323Sed // b L2 1601193323Sed // => 1602193323Sed // bne L2 1603193323Sed // b L1 1604193323Sed MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); 1605235633Sdim if (isBBInRange(MI, NewDest, Br.MaxDisp)) { 1606235633Sdim DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " 1607198090Srdivacky << *BMI); 1608193323Sed BMI->getOperand(0).setMBB(DestBB); 1609193323Sed MI->getOperand(0).setMBB(NewDest); 1610193323Sed MI->getOperand(1).setImm(CC); 1611193323Sed return true; 1612193323Sed } 1613193323Sed } 1614193323Sed } 1615193323Sed 1616193323Sed if (NeedSplit) { 1617235633Sdim splitBlockBeforeInstr(MI); 1618193323Sed // No need for the branch to the next block. We're adding an unconditional 1619193323Sed // branch to the destination. 1620193323Sed int delta = TII->GetInstSizeInBytes(&MBB->back()); 1621235633Sdim BBInfo[MBB->getNumber()].Size -= delta; 1622193323Sed MBB->back().eraseFromParent(); 1623235633Sdim // BBInfo[SplitBB].Offset is wrong temporarily, fixed below 1624193323Sed } 1625200581Srdivacky MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); 1626193323Sed 1627235633Sdim DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() 1628198090Srdivacky << " also invert condition and change dest. to BB#" 1629198090Srdivacky << NextBB->getNumber() << "\n"); 1630193323Sed 1631193323Sed // Insert a new conditional branch and a new unconditional branch. 1632193323Sed // Also update the ImmBranch as well as adding a new entry for the new branch. 1633206124Srdivacky BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) 1634193323Sed .addMBB(NextBB).addImm(CC).addReg(CCReg); 1635193323Sed Br.MI = &MBB->back(); 1636235633Sdim BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); 1637226890Sdim if (isThumb) 1638226890Sdim BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) 1639226890Sdim .addImm(ARMCC::AL).addReg(0); 1640226890Sdim else 1641226890Sdim BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); 1642235633Sdim BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); 1643193323Sed unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); 1644193323Sed ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); 1645193323Sed 1646193323Sed // Remove the old conditional branch. It may or may not still be in MBB. 1647235633Sdim BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); 1648193323Sed MI->eraseFromParent(); 1649235633Sdim adjustBBOffsetsAfter(MBB); 1650193323Sed return true; 1651193323Sed} 1652193323Sed 1653235633Sdim/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills 1654198090Srdivacky/// LR / restores LR to pc. FIXME: This is done here because it's only possible 1655198090Srdivacky/// to do this if tBfar is not used. 1656235633Sdimbool ARMConstantIslands::undoLRSpillRestore() { 1657193323Sed bool MadeChange = false; 1658193323Sed for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { 1659193323Sed MachineInstr *MI = PushPopMIs[i]; 1660205218Srdivacky // First two operands are predicates. 1661193323Sed if (MI->getOpcode() == ARM::tPOP_RET && 1662205218Srdivacky MI->getOperand(2).getReg() == ARM::PC && 1663205218Srdivacky MI->getNumExplicitOperands() == 3) { 1664224145Sdim // Create the new insn and copy the predicate from the old. 1665224145Sdim BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)) 1666224145Sdim .addOperand(MI->getOperand(0)) 1667224145Sdim .addOperand(MI->getOperand(1)); 1668193323Sed MI->eraseFromParent(); 1669193323Sed MadeChange = true; 1670193323Sed } 1671193323Sed } 1672193323Sed return MadeChange; 1673193323Sed} 1674198090Srdivacky 1675235633Sdim// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions 1676235633Sdim// below may shrink MI. 1677235633Sdimbool 1678235633SdimARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { 1679235633Sdim switch(MI->getOpcode()) { 1680235633Sdim // optimizeThumb2Instructions. 1681235633Sdim case ARM::t2LEApcrel: 1682235633Sdim case ARM::t2LDRpci: 1683235633Sdim // optimizeThumb2Branches. 1684235633Sdim case ARM::t2B: 1685235633Sdim case ARM::t2Bcc: 1686235633Sdim case ARM::tBcc: 1687235633Sdim // optimizeThumb2JumpTables. 1688235633Sdim case ARM::t2BR_JT: 1689235633Sdim return true; 1690235633Sdim } 1691235633Sdim return false; 1692235633Sdim} 1693235633Sdim 1694235633Sdimbool ARMConstantIslands::optimizeThumb2Instructions() { 1695198090Srdivacky bool MadeChange = false; 1696198090Srdivacky 1697198090Srdivacky // Shrink ADR and LDR from constantpool. 1698198090Srdivacky for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { 1699198090Srdivacky CPUser &U = CPUsers[i]; 1700198090Srdivacky unsigned Opcode = U.MI->getOpcode(); 1701198090Srdivacky unsigned NewOpc = 0; 1702198090Srdivacky unsigned Scale = 1; 1703198090Srdivacky unsigned Bits = 0; 1704198090Srdivacky switch (Opcode) { 1705198090Srdivacky default: break; 1706198090Srdivacky case ARM::t2LEApcrel: 1707198090Srdivacky if (isARMLowRegister(U.MI->getOperand(0).getReg())) { 1708198090Srdivacky NewOpc = ARM::tLEApcrel; 1709198090Srdivacky Bits = 8; 1710198090Srdivacky Scale = 4; 1711198090Srdivacky } 1712198090Srdivacky break; 1713198090Srdivacky case ARM::t2LDRpci: 1714198090Srdivacky if (isARMLowRegister(U.MI->getOperand(0).getReg())) { 1715198090Srdivacky NewOpc = ARM::tLDRpci; 1716198090Srdivacky Bits = 8; 1717198090Srdivacky Scale = 4; 1718198090Srdivacky } 1719198090Srdivacky break; 1720198090Srdivacky } 1721198090Srdivacky 1722198090Srdivacky if (!NewOpc) 1723198090Srdivacky continue; 1724198090Srdivacky 1725235633Sdim unsigned UserOffset = getUserOffset(U); 1726198090Srdivacky unsigned MaxOffs = ((1 << Bits) - 1) * Scale; 1727235633Sdim 1728235633Sdim // Be conservative with inline asm. 1729235633Sdim if (!U.KnownAlignment) 1730235633Sdim MaxOffs -= 2; 1731235633Sdim 1732198090Srdivacky // FIXME: Check if offset is multiple of scale if scale is not 4. 1733235633Sdim if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { 1734235633Sdim DEBUG(dbgs() << "Shrink: " << *U.MI); 1735198090Srdivacky U.MI->setDesc(TII->get(NewOpc)); 1736198090Srdivacky MachineBasicBlock *MBB = U.MI->getParent(); 1737235633Sdim BBInfo[MBB->getNumber()].Size -= 2; 1738235633Sdim adjustBBOffsetsAfter(MBB); 1739198090Srdivacky ++NumT2CPShrunk; 1740198090Srdivacky MadeChange = true; 1741198090Srdivacky } 1742198090Srdivacky } 1743198090Srdivacky 1744235633Sdim MadeChange |= optimizeThumb2Branches(); 1745235633Sdim MadeChange |= optimizeThumb2JumpTables(); 1746198090Srdivacky return MadeChange; 1747198090Srdivacky} 1748198090Srdivacky 1749235633Sdimbool ARMConstantIslands::optimizeThumb2Branches() { 1750198090Srdivacky bool MadeChange = false; 1751198090Srdivacky 1752198090Srdivacky for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { 1753198090Srdivacky ImmBranch &Br = ImmBranches[i]; 1754198090Srdivacky unsigned Opcode = Br.MI->getOpcode(); 1755198090Srdivacky unsigned NewOpc = 0; 1756198090Srdivacky unsigned Scale = 1; 1757198090Srdivacky unsigned Bits = 0; 1758198090Srdivacky switch (Opcode) { 1759198090Srdivacky default: break; 1760198090Srdivacky case ARM::t2B: 1761198090Srdivacky NewOpc = ARM::tB; 1762198090Srdivacky Bits = 11; 1763198090Srdivacky Scale = 2; 1764198090Srdivacky break; 1765198892Srdivacky case ARM::t2Bcc: { 1766198090Srdivacky NewOpc = ARM::tBcc; 1767198090Srdivacky Bits = 8; 1768198892Srdivacky Scale = 2; 1769198090Srdivacky break; 1770198090Srdivacky } 1771198892Srdivacky } 1772198892Srdivacky if (NewOpc) { 1773198892Srdivacky unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; 1774198892Srdivacky MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); 1775235633Sdim if (isBBInRange(Br.MI, DestBB, MaxOffs)) { 1776235633Sdim DEBUG(dbgs() << "Shrink branch: " << *Br.MI); 1777198892Srdivacky Br.MI->setDesc(TII->get(NewOpc)); 1778198892Srdivacky MachineBasicBlock *MBB = Br.MI->getParent(); 1779235633Sdim BBInfo[MBB->getNumber()].Size -= 2; 1780235633Sdim adjustBBOffsetsAfter(MBB); 1781198892Srdivacky ++NumT2BrShrunk; 1782198892Srdivacky MadeChange = true; 1783198892Srdivacky } 1784198892Srdivacky } 1785198892Srdivacky 1786198892Srdivacky Opcode = Br.MI->getOpcode(); 1787198892Srdivacky if (Opcode != ARM::tBcc) 1788198892Srdivacky continue; 1789198892Srdivacky 1790235633Sdim // If the conditional branch doesn't kill CPSR, then CPSR can be liveout 1791235633Sdim // so this transformation is not safe. 1792235633Sdim if (!Br.MI->killsRegister(ARM::CPSR)) 1793235633Sdim continue; 1794235633Sdim 1795198892Srdivacky NewOpc = 0; 1796198892Srdivacky unsigned PredReg = 0; 1797235633Sdim ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); 1798198892Srdivacky if (Pred == ARMCC::EQ) 1799198892Srdivacky NewOpc = ARM::tCBZ; 1800198892Srdivacky else if (Pred == ARMCC::NE) 1801198892Srdivacky NewOpc = ARM::tCBNZ; 1802198090Srdivacky if (!NewOpc) 1803198090Srdivacky continue; 1804198090Srdivacky MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); 1805198892Srdivacky // Check if the distance is within 126. Subtract starting offset by 2 1806198892Srdivacky // because the cmp will be eliminated. 1807235633Sdim unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2; 1808235633Sdim unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; 1809198892Srdivacky if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { 1810221345Sdim MachineBasicBlock::iterator CmpMI = Br.MI; 1811221345Sdim if (CmpMI != Br.MI->getParent()->begin()) { 1812221345Sdim --CmpMI; 1813221345Sdim if (CmpMI->getOpcode() == ARM::tCMPi8) { 1814221345Sdim unsigned Reg = CmpMI->getOperand(0).getReg(); 1815235633Sdim Pred = getInstrPredicate(CmpMI, PredReg); 1816221345Sdim if (Pred == ARMCC::AL && 1817221345Sdim CmpMI->getOperand(1).getImm() == 0 && 1818221345Sdim isARMLowRegister(Reg)) { 1819221345Sdim MachineBasicBlock *MBB = Br.MI->getParent(); 1820235633Sdim DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); 1821221345Sdim MachineInstr *NewBR = 1822221345Sdim BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) 1823221345Sdim .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); 1824221345Sdim CmpMI->eraseFromParent(); 1825221345Sdim Br.MI->eraseFromParent(); 1826221345Sdim Br.MI = NewBR; 1827235633Sdim BBInfo[MBB->getNumber()].Size -= 2; 1828235633Sdim adjustBBOffsetsAfter(MBB); 1829221345Sdim ++NumCBZ; 1830221345Sdim MadeChange = true; 1831221345Sdim } 1832198892Srdivacky } 1833198892Srdivacky } 1834198090Srdivacky } 1835198090Srdivacky } 1836198090Srdivacky 1837198090Srdivacky return MadeChange; 1838198090Srdivacky} 1839198090Srdivacky 1840235633Sdim/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller 1841198090Srdivacky/// jumptables when it's possible. 1842235633Sdimbool ARMConstantIslands::optimizeThumb2JumpTables() { 1843198090Srdivacky bool MadeChange = false; 1844198090Srdivacky 1845198090Srdivacky // FIXME: After the tables are shrunk, can we get rid some of the 1846198090Srdivacky // constantpool tables? 1847235633Sdim MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 1848203954Srdivacky if (MJTI == 0) return false; 1849210299Sed 1850198090Srdivacky const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 1851198090Srdivacky for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { 1852198090Srdivacky MachineInstr *MI = T2JumpTables[i]; 1853224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 1854224145Sdim unsigned NumOps = MCID.getNumOperands(); 1855235633Sdim unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); 1856198090Srdivacky MachineOperand JTOP = MI->getOperand(JTOpIdx); 1857198090Srdivacky unsigned JTI = JTOP.getIndex(); 1858198090Srdivacky assert(JTI < JT.size()); 1859198090Srdivacky 1860198090Srdivacky bool ByteOk = true; 1861198090Srdivacky bool HalfWordOk = true; 1862235633Sdim unsigned JTOffset = getOffsetOf(MI) + 4; 1863198090Srdivacky const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; 1864198090Srdivacky for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { 1865198090Srdivacky MachineBasicBlock *MBB = JTBBs[j]; 1866235633Sdim unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; 1867198090Srdivacky // Negative offset is not ok. FIXME: We should change BB layout to make 1868198090Srdivacky // sure all the branches are forward. 1869198090Srdivacky if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) 1870198090Srdivacky ByteOk = false; 1871198090Srdivacky unsigned TBHLimit = ((1<<16)-1)*2; 1872198090Srdivacky if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit) 1873198090Srdivacky HalfWordOk = false; 1874198090Srdivacky if (!ByteOk && !HalfWordOk) 1875198090Srdivacky break; 1876198090Srdivacky } 1877198090Srdivacky 1878198090Srdivacky if (ByteOk || HalfWordOk) { 1879198090Srdivacky MachineBasicBlock *MBB = MI->getParent(); 1880198090Srdivacky unsigned BaseReg = MI->getOperand(0).getReg(); 1881198090Srdivacky bool BaseRegKill = MI->getOperand(0).isKill(); 1882198090Srdivacky if (!BaseRegKill) 1883198090Srdivacky continue; 1884198090Srdivacky unsigned IdxReg = MI->getOperand(1).getReg(); 1885198090Srdivacky bool IdxRegKill = MI->getOperand(1).isKill(); 1886210299Sed 1887210299Sed // Scan backwards to find the instruction that defines the base 1888210299Sed // register. Due to post-RA scheduling, we can't count on it 1889210299Sed // immediately preceding the branch instruction. 1890198090Srdivacky MachineBasicBlock::iterator PrevI = MI; 1891210299Sed MachineBasicBlock::iterator B = MBB->begin(); 1892210299Sed while (PrevI != B && !PrevI->definesRegister(BaseReg)) 1893210299Sed --PrevI; 1894210299Sed 1895210299Sed // If for some reason we didn't find it, we can't do anything, so 1896210299Sed // just skip this one. 1897210299Sed if (!PrevI->definesRegister(BaseReg)) 1898198090Srdivacky continue; 1899198090Srdivacky 1900210299Sed MachineInstr *AddrMI = PrevI; 1901198090Srdivacky bool OptOk = true; 1902210299Sed // Examine the instruction that calculates the jumptable entry address. 1903210299Sed // Make sure it only defines the base register and kills any uses 1904210299Sed // other than the index register. 1905198090Srdivacky for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { 1906198090Srdivacky const MachineOperand &MO = AddrMI->getOperand(k); 1907198090Srdivacky if (!MO.isReg() || !MO.getReg()) 1908198090Srdivacky continue; 1909198090Srdivacky if (MO.isDef() && MO.getReg() != BaseReg) { 1910198090Srdivacky OptOk = false; 1911198090Srdivacky break; 1912198090Srdivacky } 1913198090Srdivacky if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { 1914198090Srdivacky OptOk = false; 1915198090Srdivacky break; 1916198090Srdivacky } 1917198090Srdivacky } 1918198090Srdivacky if (!OptOk) 1919198090Srdivacky continue; 1920198090Srdivacky 1921210299Sed // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction 1922210299Sed // that gave us the initial base register definition. 1923210299Sed for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) 1924210299Sed ; 1925210299Sed 1926210299Sed // The instruction should be a tLEApcrel or t2LEApcrelJT; we want 1927198090Srdivacky // to delete it as well. 1928210299Sed MachineInstr *LeaMI = PrevI; 1929198090Srdivacky if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && 1930198090Srdivacky LeaMI->getOpcode() != ARM::t2LEApcrelJT) || 1931198090Srdivacky LeaMI->getOperand(0).getReg() != BaseReg) 1932198090Srdivacky OptOk = false; 1933198090Srdivacky 1934198090Srdivacky if (!OptOk) 1935198090Srdivacky continue; 1936198090Srdivacky 1937235633Sdim DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI 1938235633Sdim << " lea: " << *LeaMI); 1939218893Sdim unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; 1940198090Srdivacky MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) 1941198090Srdivacky .addReg(IdxReg, getKillRegState(IdxRegKill)) 1942198090Srdivacky .addJumpTableIndex(JTI, JTOP.getTargetFlags()) 1943198090Srdivacky .addImm(MI->getOperand(JTOpIdx+1).getImm()); 1944235633Sdim DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); 1945198090Srdivacky // FIXME: Insert an "ALIGN" instruction to ensure the next instruction 1946198090Srdivacky // is 2-byte aligned. For now, asm printer will fix it up. 1947198090Srdivacky unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); 1948198090Srdivacky unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI); 1949198090Srdivacky OrigSize += TII->GetInstSizeInBytes(LeaMI); 1950198090Srdivacky OrigSize += TII->GetInstSizeInBytes(MI); 1951198090Srdivacky 1952198090Srdivacky AddrMI->eraseFromParent(); 1953198090Srdivacky LeaMI->eraseFromParent(); 1954198090Srdivacky MI->eraseFromParent(); 1955198090Srdivacky 1956198090Srdivacky int delta = OrigSize - NewSize; 1957235633Sdim BBInfo[MBB->getNumber()].Size -= delta; 1958235633Sdim adjustBBOffsetsAfter(MBB); 1959198090Srdivacky 1960198090Srdivacky ++NumTBs; 1961198090Srdivacky MadeChange = true; 1962198090Srdivacky } 1963198090Srdivacky } 1964198090Srdivacky 1965198090Srdivacky return MadeChange; 1966198090Srdivacky} 1967199481Srdivacky 1968235633Sdim/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that 1969199481Srdivacky/// jump tables always branch forwards, since that's what tbb and tbh need. 1970235633Sdimbool ARMConstantIslands::reorderThumb2JumpTables() { 1971199481Srdivacky bool MadeChange = false; 1972199481Srdivacky 1973235633Sdim MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 1974203954Srdivacky if (MJTI == 0) return false; 1975210299Sed 1976199481Srdivacky const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 1977199481Srdivacky for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { 1978199481Srdivacky MachineInstr *MI = T2JumpTables[i]; 1979224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 1980224145Sdim unsigned NumOps = MCID.getNumOperands(); 1981235633Sdim unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); 1982199481Srdivacky MachineOperand JTOP = MI->getOperand(JTOpIdx); 1983199481Srdivacky unsigned JTI = JTOP.getIndex(); 1984199481Srdivacky assert(JTI < JT.size()); 1985199481Srdivacky 1986199481Srdivacky // We prefer if target blocks for the jump table come after the jump 1987199481Srdivacky // instruction so we can use TB[BH]. Loop through the target blocks 1988199481Srdivacky // and try to adjust them such that that's true. 1989199481Srdivacky int JTNumber = MI->getParent()->getNumber(); 1990199481Srdivacky const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; 1991199481Srdivacky for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { 1992199481Srdivacky MachineBasicBlock *MBB = JTBBs[j]; 1993199481Srdivacky int DTNumber = MBB->getNumber(); 1994199481Srdivacky 1995199481Srdivacky if (DTNumber < JTNumber) { 1996199481Srdivacky // The destination precedes the switch. Try to move the block forward 1997199481Srdivacky // so we have a positive offset. 1998199481Srdivacky MachineBasicBlock *NewBB = 1999235633Sdim adjustJTTargetBlockForward(MBB, MI->getParent()); 2000199481Srdivacky if (NewBB) 2001199481Srdivacky MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); 2002199481Srdivacky MadeChange = true; 2003199481Srdivacky } 2004199481Srdivacky } 2005199481Srdivacky } 2006199481Srdivacky 2007199481Srdivacky return MadeChange; 2008199481Srdivacky} 2009199481Srdivacky 2010199481SrdivackyMachineBasicBlock *ARMConstantIslands:: 2011235633SdimadjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { 2012210299Sed // If the destination block is terminated by an unconditional branch, 2013199481Srdivacky // try to move it; otherwise, create a new block following the jump 2014199481Srdivacky // table that branches back to the actual target. This is a very simple 2015199481Srdivacky // heuristic. FIXME: We can definitely improve it. 2016199481Srdivacky MachineBasicBlock *TBB = 0, *FBB = 0; 2017199481Srdivacky SmallVector<MachineOperand, 4> Cond; 2018199481Srdivacky SmallVector<MachineOperand, 4> CondPrior; 2019199481Srdivacky MachineFunction::iterator BBi = BB; 2020199481Srdivacky MachineFunction::iterator OldPrior = prior(BBi); 2021199481Srdivacky 2022199481Srdivacky // If the block terminator isn't analyzable, don't try to move the block 2023199481Srdivacky bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); 2024199481Srdivacky 2025199481Srdivacky // If the block ends in an unconditional branch, move it. The prior block 2026199481Srdivacky // has to have an analyzable terminator for us to move this one. Be paranoid 2027199481Srdivacky // and make sure we're not trying to move the entry block of the function. 2028235633Sdim if (!B && Cond.empty() && BB != MF->begin() && 2029199481Srdivacky !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { 2030199481Srdivacky BB->moveAfter(JTBB); 2031199481Srdivacky OldPrior->updateTerminator(); 2032199481Srdivacky BB->updateTerminator(); 2033199481Srdivacky // Update numbering to account for the block being moved. 2034235633Sdim MF->RenumberBlocks(); 2035199481Srdivacky ++NumJTMoved; 2036199481Srdivacky return NULL; 2037199481Srdivacky } 2038199481Srdivacky 2039199481Srdivacky // Create a new MBB for the code after the jump BB. 2040199481Srdivacky MachineBasicBlock *NewBB = 2041235633Sdim MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); 2042199481Srdivacky MachineFunction::iterator MBBI = JTBB; ++MBBI; 2043235633Sdim MF->insert(MBBI, NewBB); 2044199481Srdivacky 2045199481Srdivacky // Add an unconditional branch from NewBB to BB. 2046199481Srdivacky // There doesn't seem to be meaningful DebugInfo available; this doesn't 2047199481Srdivacky // correspond directly to anything in the source. 2048199481Srdivacky assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); 2049226890Sdim BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB) 2050226890Sdim .addImm(ARMCC::AL).addReg(0); 2051199481Srdivacky 2052199481Srdivacky // Update internal data structures to account for the newly inserted MBB. 2053235633Sdim MF->RenumberBlocks(NewBB); 2054199481Srdivacky 2055199481Srdivacky // Update the CFG. 2056199481Srdivacky NewBB->addSuccessor(BB); 2057199481Srdivacky JTBB->removeSuccessor(BB); 2058199481Srdivacky JTBB->addSuccessor(NewBB); 2059199481Srdivacky 2060199481Srdivacky ++NumJTInserted; 2061199481Srdivacky return NewBB; 2062199481Srdivacky} 2063