1234353Sdim//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9224145Sdim// This file implements the ARM specific subclass of TargetSubtargetInfo. 10193323Sed// 11193323Sed//===----------------------------------------------------------------------===// 12193323Sed 13321369Sdim#include "ARM.h" 14321369Sdim 15321369Sdim#include "ARMCallLowering.h" 16321369Sdim#include "ARMLegalizerInfo.h" 17321369Sdim#include "ARMRegisterBankInfo.h" 18193323Sed#include "ARMSubtarget.h" 19276479Sdim#include "ARMFrameLowering.h" 20276479Sdim#include "ARMInstrInfo.h" 21276479Sdim#include "ARMSubtarget.h" 22280031Sdim#include "ARMTargetMachine.h" 23321369Sdim#include "MCTargetDesc/ARMMCTargetDesc.h" 24276479Sdim#include "Thumb1FrameLowering.h" 25276479Sdim#include "Thumb1InstrInfo.h" 26276479Sdim#include "Thumb2InstrInfo.h" 27321369Sdim#include "llvm/ADT/StringRef.h" 28321369Sdim#include "llvm/ADT/Triple.h" 29321369Sdim#include "llvm/ADT/Twine.h" 30321369Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 31321369Sdim#include "llvm/CodeGen/MachineFunction.h" 32276479Sdim#include "llvm/IR/Function.h" 33249423Sdim#include "llvm/IR/GlobalValue.h" 34296417Sdim#include "llvm/MC/MCAsmInfo.h" 35321369Sdim#include "llvm/MC/MCTargetOptions.h" 36321369Sdim#include "llvm/Support/CodeGen.h" 37249423Sdim#include "llvm/Support/CommandLine.h" 38321369Sdim#include "llvm/Support/TargetParser.h" 39249423Sdim#include "llvm/Target/TargetOptions.h" 40224145Sdim 41276479Sdimusing namespace llvm; 42276479Sdim 43276479Sdim#define DEBUG_TYPE "arm-subtarget" 44276479Sdim 45224145Sdim#define GET_SUBTARGETINFO_TARGET_DESC 46224145Sdim#define GET_SUBTARGETINFO_CTOR 47224145Sdim#include "ARMGenSubtargetInfo.inc" 48224145Sdim 49194710Sedstatic cl::opt<bool> 50243830SdimUseFusedMulOps("arm-use-mulops", 51243830Sdim cl::init(true), cl::Hidden); 52243830Sdim 53261991Sdimenum ITMode { 54261991Sdim DefaultIT, 55261991Sdim RestrictedIT, 56261991Sdim NoRestrictedIT 57261991Sdim}; 58261991Sdim 59261991Sdimstatic cl::opt<ITMode> 60261991SdimIT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), 61261991Sdim cl::ZeroOrMore, 62261991Sdim cl::values(clEnumValN(DefaultIT, "arm-default-it", 63261991Sdim "Generate IT block based on arch"), 64261991Sdim clEnumValN(RestrictedIT, "arm-restrict-it", 65261991Sdim "Disallow deprecated IT based on ARMv8"), 66261991Sdim clEnumValN(NoRestrictedIT, "arm-no-restrict-it", 67314564Sdim "Allow IT blocks based on ARMv7"))); 68261991Sdim 69296417Sdim/// ForceFastISel - Use the fast-isel, even for subtargets where it is not 70296417Sdim/// currently supported (for testing only). 71296417Sdimstatic cl::opt<bool> 72296417SdimForceFastISel("arm-force-fast-isel", 73296417Sdim cl::init(false), cl::Hidden); 74296417Sdim 75360784Sdimstatic cl::opt<bool> EnableSubRegLiveness("arm-enable-subreg-liveness", 76360784Sdim cl::init(false), cl::Hidden); 77360784Sdim 78276479Sdim/// initializeSubtargetDependencies - Initializes using a CPU and feature string 79276479Sdim/// so that we can use initializer lists for subtarget initialization. 80276479SdimARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, 81276479Sdim StringRef FS) { 82249423Sdim initializeEnvironment(); 83280031Sdim initSubtargetFeatures(CPU, FS); 84276479Sdim return *this; 85249423Sdim} 86249423Sdim 87288943SdimARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, 88288943Sdim StringRef FS) { 89288943Sdim ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); 90288943Sdim if (STI.isThumb1Only()) 91288943Sdim return (ARMFrameLowering *)new Thumb1FrameLowering(STI); 92288943Sdim 93288943Sdim return new ARMFrameLowering(STI); 94288943Sdim} 95288943Sdim 96288943SdimARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, 97288943Sdim const std::string &FS, 98353358Sdim const ARMBaseTargetMachine &TM, bool IsLittle, 99353358Sdim bool MinSize) 100309124Sdim : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), 101353358Sdim CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), 102353358Sdim TargetTriple(TT), Options(TM.Options), TM(TM), 103353358Sdim FrameLowering(initializeFrameLowering(CPU, FS)), 104288943Sdim // At this point initializeSubtargetDependencies has been called so 105288943Sdim // we can query directly. 106276479Sdim InstrInfo(isThumb1Only() 107276479Sdim ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) 108276479Sdim : !isThumb() 109276479Sdim ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) 110276479Sdim : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), 111321369Sdim TLInfo(TM, *this) { 112276479Sdim 113327952Sdim CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); 114327952Sdim Legalizer.reset(new ARMLegalizerInfo(*this)); 115321369Sdim 116321369Sdim auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); 117321369Sdim 118321369Sdim // FIXME: At this point, we can't rely on Subtarget having RBI. 119321369Sdim // It's awkward to mix passing RBI and the Subtarget; should we pass 120321369Sdim // TII/TRI as well? 121327952Sdim InstSelector.reset(createARMInstructionSelector( 122321369Sdim *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI)); 123321369Sdim 124327952Sdim RegBankInfo.reset(RBI); 125321369Sdim} 126321369Sdim 127314564Sdimconst CallLowering *ARMSubtarget::getCallLowering() const { 128327952Sdim return CallLoweringInfo.get(); 129314564Sdim} 130314564Sdim 131360784SdimInstructionSelector *ARMSubtarget::getInstructionSelector() const { 132327952Sdim return InstSelector.get(); 133314564Sdim} 134314564Sdim 135314564Sdimconst LegalizerInfo *ARMSubtarget::getLegalizerInfo() const { 136327952Sdim return Legalizer.get(); 137314564Sdim} 138314564Sdim 139314564Sdimconst RegisterBankInfo *ARMSubtarget::getRegBankInfo() const { 140327952Sdim return RegBankInfo.get(); 141314564Sdim} 142314564Sdim 143314564Sdimbool ARMSubtarget::isXRaySupported() const { 144314564Sdim // We don't currently suppport Thumb, but Windows requires Thumb. 145314564Sdim return hasV6Ops() && hasARMOps() && !isTargetWindows(); 146314564Sdim} 147314564Sdim 148249423Sdimvoid ARMSubtarget::initializeEnvironment() { 149296417Sdim // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this 150296417Sdim // directly from it, but we can try to make sure they're consistent when both 151296417Sdim // available. 152327952Sdim UseSjLjEH = (isTargetDarwin() && !isTargetWatchABI() && 153327952Sdim Options.ExceptionModel == ExceptionHandling::None) || 154327952Sdim Options.ExceptionModel == ExceptionHandling::SjLj; 155296417Sdim assert((!TM.getMCAsmInfo() || 156296417Sdim (TM.getMCAsmInfo()->getExceptionHandlingType() == 157296417Sdim ExceptionHandling::SjLj) == UseSjLjEH) && 158296417Sdim "inconsistent sjlj choice between CodeGen and MC"); 159249423Sdim} 160249423Sdim 161280031Sdimvoid ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { 162261991Sdim if (CPUString.empty()) { 163296417Sdim CPUString = "generic"; 164296417Sdim 165296417Sdim if (isTargetDarwin()) { 166296417Sdim StringRef ArchName = TargetTriple.getArchName(); 167327952Sdim ARM::ArchKind AK = ARM::parseArch(ArchName); 168327952Sdim if (AK == ARM::ArchKind::ARMV7S) 169296417Sdim // Default to the Swift CPU when targeting armv7s/thumbv7s. 170296417Sdim CPUString = "swift"; 171327952Sdim else if (AK == ARM::ArchKind::ARMV7K) 172296417Sdim // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. 173296417Sdim // ARMv7k does not use SjLj exception handling. 174296417Sdim CPUString = "cortex-a7"; 175296417Sdim } 176261991Sdim } 177193323Sed 178224145Sdim // Insert the architecture feature derived from the target triple into the 179224145Sdim // feature string. This is important for setting features that are implied 180224145Sdim // based on the architecture version. 181288943Sdim std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString); 182224145Sdim if (!FS.empty()) { 183224145Sdim if (!ArchFS.empty()) 184288943Sdim ArchFS = (Twine(ArchFS) + "," + FS).str(); 185224145Sdim else 186224145Sdim ArchFS = FS; 187218893Sdim } 188224145Sdim ParseSubtargetFeatures(CPUString, ArchFS); 189204961Srdivacky 190276479Sdim // FIXME: This used enable V6T2 support implicitly for Thumb2 mode. 191276479Sdim // Assert this for now to make the change obvious. 192276479Sdim assert(hasV6T2Ops() || !hasThumb2()); 193193323Sed 194314564Sdim // Execute only support requires movt support 195344779Sdim if (genExecuteOnly()) { 196344779Sdim NoMovt = false; 197344779Sdim assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target"); 198344779Sdim } 199314564Sdim 200239462Sdim // Keep a pointer to static instruction cost data for the specified CPU. 201239462Sdim SchedModel = getSchedModelForCPU(CPUString); 202239462Sdim 203224145Sdim // Initialize scheduling itinerary for the specified CPU. 204224145Sdim InstrItins = getInstrItineraryForCPU(CPUString); 205193323Sed 206276479Sdim // FIXME: this is invalid for WindowsCE 207280031Sdim if (isTargetWindows()) 208276479Sdim NoARM = true; 209198090Srdivacky 210193323Sed if (isAAPCS_ABI()) 211360784Sdim stackAlignment = Align(8); 212296417Sdim if (isTargetNaCl() || isAAPCS16_ABI()) 213360784Sdim stackAlignment = Align(16); 214193323Sed 215296417Sdim // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: 216296417Sdim // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 217296417Sdim // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 218296417Sdim // support in the assembler and linker to be used. This would need to be 219296417Sdim // fixed to fully support tail calls in Thumb1. 220296417Sdim // 221321369Sdim // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M 222321369Sdim // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This 223321369Sdim // means if we need to reload LR, it takes extra instructions, which outweighs 224321369Sdim // the value of the tail call; but here we don't know yet whether LR is going 225327952Sdim // to be used. We take the optimistic approach of generating the tail call and 226327952Sdim // perhaps taking a hit if we need to restore the LR. 227261991Sdim 228296417Sdim // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 229296417Sdim // but we need to make sure there are enough registers; the only valid 230296417Sdim // registers are the 4 used for parameters. We don't currently do this 231296417Sdim // case. 232198090Srdivacky 233309124Sdim SupportsTailCall = !isThumb() || hasV8MBaselineOps(); 234249423Sdim 235296417Sdim if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0)) 236296417Sdim SupportsTailCall = false; 237280031Sdim 238261991Sdim switch (IT) { 239261991Sdim case DefaultIT: 240288943Sdim RestrictIT = hasV8Ops(); 241261991Sdim break; 242261991Sdim case RestrictedIT: 243261991Sdim RestrictIT = true; 244261991Sdim break; 245261991Sdim case NoRestrictedIT: 246261991Sdim RestrictIT = false; 247261991Sdim break; 248261991Sdim } 249261991Sdim 250249423Sdim // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. 251288943Sdim const FeatureBitset &Bits = getFeatureBits(); 252288943Sdim if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters 253249423Sdim (Options.UnsafeFPMath || isTargetDarwin())) 254249423Sdim UseNEONForSinglePrecisionFP = true; 255309124Sdim 256314564Sdim if (isRWPI()) 257314564Sdim ReserveR9 = true; 258314564Sdim 259360784Sdim // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2 260360784Sdim if (MVEVectorCostFactor == 0) 261360784Sdim MVEVectorCostFactor = 2; 262360784Sdim 263309124Sdim // FIXME: Teach TableGen to deal with these instead of doing it manually here. 264309124Sdim switch (ARMProcFamily) { 265309124Sdim case Others: 266309124Sdim case CortexA5: 267309124Sdim break; 268309124Sdim case CortexA7: 269309124Sdim LdStMultipleTiming = DoubleIssue; 270309124Sdim break; 271309124Sdim case CortexA8: 272309124Sdim LdStMultipleTiming = DoubleIssue; 273309124Sdim break; 274309124Sdim case CortexA9: 275309124Sdim LdStMultipleTiming = DoubleIssueCheckUnalignedAccess; 276309124Sdim PreISelOperandLatencyAdjustment = 1; 277309124Sdim break; 278309124Sdim case CortexA12: 279309124Sdim break; 280309124Sdim case CortexA15: 281309124Sdim MaxInterleaveFactor = 2; 282309124Sdim PreISelOperandLatencyAdjustment = 1; 283309124Sdim PartialUpdateClearance = 12; 284309124Sdim break; 285309124Sdim case CortexA17: 286309124Sdim case CortexA32: 287309124Sdim case CortexA35: 288309124Sdim case CortexA53: 289327952Sdim case CortexA55: 290309124Sdim case CortexA57: 291309124Sdim case CortexA72: 292309124Sdim case CortexA73: 293327952Sdim case CortexA75: 294353358Sdim case CortexA76: 295309124Sdim case CortexR4: 296309124Sdim case CortexR4F: 297309124Sdim case CortexR5: 298309124Sdim case CortexR7: 299309124Sdim case CortexM3: 300327952Sdim case CortexR52: 301344779Sdim break; 302344779Sdim case Exynos: 303344779Sdim LdStMultipleTiming = SingleIssuePlusExtras; 304344779Sdim MaxInterleaveFactor = 4; 305344779Sdim if (!isThumb()) 306360784Sdim PrefLoopLogAlignment = 3; 307344779Sdim break; 308321369Sdim case Kryo: 309309124Sdim break; 310309124Sdim case Krait: 311309124Sdim PreISelOperandLatencyAdjustment = 1; 312309124Sdim break; 313360784Sdim case NeoverseN1: 314360784Sdim break; 315309124Sdim case Swift: 316309124Sdim MaxInterleaveFactor = 2; 317309124Sdim LdStMultipleTiming = SingleIssuePlusExtras; 318309124Sdim PreISelOperandLatencyAdjustment = 1; 319309124Sdim PartialUpdateClearance = 12; 320309124Sdim break; 321309124Sdim } 322193323Sed} 323198090Srdivacky 324341825Sdimbool ARMSubtarget::isTargetHardFloat() const { return TM.isTargetHardFloat(); } 325341825Sdim 326280031Sdimbool ARMSubtarget::isAPCS_ABI() const { 327280031Sdim assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 328280031Sdim return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; 329280031Sdim} 330280031Sdimbool ARMSubtarget::isAAPCS_ABI() const { 331280031Sdim assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 332296417Sdim return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || 333296417Sdim TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; 334280031Sdim} 335296417Sdimbool ARMSubtarget::isAAPCS16_ABI() const { 336296417Sdim assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 337296417Sdim return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; 338296417Sdim} 339280031Sdim 340314564Sdimbool ARMSubtarget::isROPI() const { 341314564Sdim return TM.getRelocationModel() == Reloc::ROPI || 342314564Sdim TM.getRelocationModel() == Reloc::ROPI_RWPI; 343314564Sdim} 344314564Sdimbool ARMSubtarget::isRWPI() const { 345314564Sdim return TM.getRelocationModel() == Reloc::RWPI || 346314564Sdim TM.getRelocationModel() == Reloc::ROPI_RWPI; 347314564Sdim} 348314564Sdim 349309124Sdimbool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { 350309124Sdim if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 351309124Sdim return true; 352296417Sdim 353309124Sdim // 32 bit macho has no relocation for a-b if a is undefined, even if b is in 354309124Sdim // the section that is being relocated. This means we have to use o load even 355309124Sdim // for GVs that are known to be local to the dso. 356314564Sdim if (isTargetMachO() && TM.isPositionIndependent() && 357309124Sdim (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) 358198090Srdivacky return true; 359198090Srdivacky 360198090Srdivacky return false; 361198090Srdivacky} 362199481Srdivacky 363327952Sdimbool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const { 364327952Sdim return isTargetELF() && TM.isPositionIndependent() && 365327952Sdim !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); 366327952Sdim} 367327952Sdim 368218893Sdimunsigned ARMSubtarget::getMispredictionPenalty() const { 369280031Sdim return SchedModel.MispredictPenalty; 370218893Sdim} 371218893Sdim 372296417Sdimbool ARMSubtarget::enableMachineScheduler() const { 373353358Sdim // The MachineScheduler can increase register usage, so we use more high 374353358Sdim // registers and end up with more T2 instructions that cannot be converted to 375353358Sdim // T1 instructions. At least until we do better at converting to thumb1 376353358Sdim // instructions, on cortex-m at Oz where we are size-paranoid, don't use the 377353358Sdim // Machine scheduler, relying on the DAG register pressure scheduler instead. 378353358Sdim if (isMClass() && hasMinSize()) 379353358Sdim return false; 380327952Sdim // Enable the MachineScheduler before register allocation for subtargets 381327952Sdim // with the use-misched feature. 382327952Sdim return useMachineScheduler(); 383296417Sdim} 384296417Sdim 385360784Sdimbool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; } 386360784Sdim 387276479Sdim// This overrides the PostRAScheduler bit in the SchedModel for any CPU. 388288943Sdimbool ARMSubtarget::enablePostRAScheduler() const { 389360784Sdim if (enableMachineScheduler()) 390360784Sdim return false; 391327952Sdim if (disablePostRAScheduler()) 392296417Sdim return false; 393360784Sdim // Thumb1 cores will generally not benefit from post-ra scheduling 394327952Sdim return !isThumb1Only(); 395199481Srdivacky} 396276479Sdim 397360784Sdimbool ARMSubtarget::enablePostRAMachineScheduler() const { 398360784Sdim if (!enableMachineScheduler()) 399360784Sdim return false; 400360784Sdim if (disablePostRAScheduler()) 401360784Sdim return false; 402360784Sdim return !isThumb1Only(); 403360784Sdim} 404360784Sdim 405314564Sdimbool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } 406276479Sdim 407353358Sdimbool ARMSubtarget::useStride4VFPs() const { 408296417Sdim // For general targets, the prologue can grow when VFPs are allocated with 409296417Sdim // stride 4 (more vpush instructions). But WatchOS uses a compact unwind 410296417Sdim // format which it's more important to get right. 411344779Sdim return isTargetWatchABI() || 412353358Sdim (useWideStrideVFP() && !OptMinSize); 413296417Sdim} 414296417Sdim 415353358Sdimbool ARMSubtarget::useMovt() const { 416276479Sdim // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit 417276479Sdim // immediates as it is inherently position independent, and may be out of 418276479Sdim // range otherwise. 419309124Sdim return !NoMovt && hasV8MBaselineOps() && 420353358Sdim (isTargetWindows() || !OptMinSize || genExecuteOnly()); 421276479Sdim} 422288943Sdim 423288943Sdimbool ARMSubtarget::useFastISel() const { 424296417Sdim // Enable fast-isel for any target, for testing only. 425296417Sdim if (ForceFastISel) 426296417Sdim return true; 427296417Sdim 428296417Sdim // Limit fast-isel to the targets that are or have been tested. 429296417Sdim if (!hasV6Ops()) 430296417Sdim return false; 431296417Sdim 432288943Sdim // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. 433288943Sdim return TM.Options.EnableFastISel && 434288943Sdim ((isTargetMachO() && !isThumb1Only()) || 435288943Sdim (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb())); 436288943Sdim} 437353358Sdim 438353358Sdimunsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const { 439353358Sdim // The GPR register class has multiple possible allocation orders, with 440353358Sdim // tradeoffs preferred by different sub-architectures and optimisation goals. 441353358Sdim // The allocation orders are: 442353358Sdim // 0: (the default tablegen order, not used) 443353358Sdim // 1: r14, r0-r13 444353358Sdim // 2: r0-r7 445353358Sdim // 3: r0-r7, r12, lr, r8-r11 446353358Sdim // Note that the register allocator will change this order so that 447353358Sdim // callee-saved registers are used later, as they require extra work in the 448353358Sdim // prologue/epilogue (though we sometimes override that). 449353358Sdim 450353358Sdim // For thumb1-only targets, only the low registers are allocatable. 451353358Sdim if (isThumb1Only()) 452353358Sdim return 2; 453353358Sdim 454353358Sdim // Allocate low registers first, so we can select more 16-bit instructions. 455353358Sdim // We also (in ignoreCSRForAllocationOrder) override the default behaviour 456353358Sdim // with regards to callee-saved registers, because pushing extra registers is 457353358Sdim // much cheaper (in terms of code size) than using high registers. After 458353358Sdim // that, we allocate r12 (doesn't need to be saved), lr (saving it means we 459353358Sdim // can return with the pop, don't need an extra "bx lr") and then the rest of 460353358Sdim // the high registers. 461353358Sdim if (isThumb2() && MF.getFunction().hasMinSize()) 462353358Sdim return 3; 463353358Sdim 464353358Sdim // Otherwise, allocate in the default order, using LR first because saving it 465353358Sdim // allows a shorter epilogue sequence. 466353358Sdim return 1; 467353358Sdim} 468353358Sdim 469353358Sdimbool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, 470353358Sdim unsigned PhysReg) const { 471353358Sdim // To minimize code size in Thumb2, we prefer the usage of low regs (lower 472353358Sdim // cost per use) so we can use narrow encoding. By default, caller-saved 473353358Sdim // registers (e.g. lr, r12) are always allocated first, regardless of 474353358Sdim // their cost per use. When optForMinSize, we prefer the low regs even if 475353358Sdim // they are CSR because usually push/pop can be folded into existing ones. 476353358Sdim return isThumb2() && MF.getFunction().hasMinSize() && 477353358Sdim ARM::GPRRegClass.contains(PhysReg); 478353358Sdim} 479