1234353Sdim//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
2193323Sed//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6193323Sed//
7193323Sed//===----------------------------------------------------------------------===//
8193323Sed//
9224145Sdim// This file implements the ARM specific subclass of TargetSubtargetInfo.
10193323Sed//
11193323Sed//===----------------------------------------------------------------------===//
12193323Sed
13321369Sdim#include "ARM.h"
14321369Sdim
15321369Sdim#include "ARMCallLowering.h"
16321369Sdim#include "ARMLegalizerInfo.h"
17321369Sdim#include "ARMRegisterBankInfo.h"
18193323Sed#include "ARMSubtarget.h"
19276479Sdim#include "ARMFrameLowering.h"
20276479Sdim#include "ARMInstrInfo.h"
21276479Sdim#include "ARMSubtarget.h"
22280031Sdim#include "ARMTargetMachine.h"
23321369Sdim#include "MCTargetDesc/ARMMCTargetDesc.h"
24276479Sdim#include "Thumb1FrameLowering.h"
25276479Sdim#include "Thumb1InstrInfo.h"
26276479Sdim#include "Thumb2InstrInfo.h"
27321369Sdim#include "llvm/ADT/StringRef.h"
28321369Sdim#include "llvm/ADT/Triple.h"
29321369Sdim#include "llvm/ADT/Twine.h"
30321369Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
31321369Sdim#include "llvm/CodeGen/MachineFunction.h"
32276479Sdim#include "llvm/IR/Function.h"
33249423Sdim#include "llvm/IR/GlobalValue.h"
34296417Sdim#include "llvm/MC/MCAsmInfo.h"
35321369Sdim#include "llvm/MC/MCTargetOptions.h"
36321369Sdim#include "llvm/Support/CodeGen.h"
37249423Sdim#include "llvm/Support/CommandLine.h"
38321369Sdim#include "llvm/Support/TargetParser.h"
39249423Sdim#include "llvm/Target/TargetOptions.h"
40224145Sdim
41276479Sdimusing namespace llvm;
42276479Sdim
43276479Sdim#define DEBUG_TYPE "arm-subtarget"
44276479Sdim
45224145Sdim#define GET_SUBTARGETINFO_TARGET_DESC
46224145Sdim#define GET_SUBTARGETINFO_CTOR
47224145Sdim#include "ARMGenSubtargetInfo.inc"
48224145Sdim
49194710Sedstatic cl::opt<bool>
50243830SdimUseFusedMulOps("arm-use-mulops",
51243830Sdim               cl::init(true), cl::Hidden);
52243830Sdim
53261991Sdimenum ITMode {
54261991Sdim  DefaultIT,
55261991Sdim  RestrictedIT,
56261991Sdim  NoRestrictedIT
57261991Sdim};
58261991Sdim
59261991Sdimstatic cl::opt<ITMode>
60261991SdimIT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
61261991Sdim   cl::ZeroOrMore,
62261991Sdim   cl::values(clEnumValN(DefaultIT, "arm-default-it",
63261991Sdim                         "Generate IT block based on arch"),
64261991Sdim              clEnumValN(RestrictedIT, "arm-restrict-it",
65261991Sdim                         "Disallow deprecated IT based on ARMv8"),
66261991Sdim              clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
67314564Sdim                         "Allow IT blocks based on ARMv7")));
68261991Sdim
69296417Sdim/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
70296417Sdim/// currently supported (for testing only).
71296417Sdimstatic cl::opt<bool>
72296417SdimForceFastISel("arm-force-fast-isel",
73296417Sdim               cl::init(false), cl::Hidden);
74296417Sdim
75360784Sdimstatic cl::opt<bool> EnableSubRegLiveness("arm-enable-subreg-liveness",
76360784Sdim                                          cl::init(false), cl::Hidden);
77360784Sdim
78276479Sdim/// initializeSubtargetDependencies - Initializes using a CPU and feature string
79276479Sdim/// so that we can use initializer lists for subtarget initialization.
80276479SdimARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
81276479Sdim                                                            StringRef FS) {
82249423Sdim  initializeEnvironment();
83280031Sdim  initSubtargetFeatures(CPU, FS);
84276479Sdim  return *this;
85249423Sdim}
86249423Sdim
87288943SdimARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
88288943Sdim                                                        StringRef FS) {
89288943Sdim  ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
90288943Sdim  if (STI.isThumb1Only())
91288943Sdim    return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
92288943Sdim
93288943Sdim  return new ARMFrameLowering(STI);
94288943Sdim}
95288943Sdim
96288943SdimARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
97288943Sdim                           const std::string &FS,
98353358Sdim                           const ARMBaseTargetMachine &TM, bool IsLittle,
99353358Sdim                           bool MinSize)
100309124Sdim    : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
101353358Sdim      CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
102353358Sdim      TargetTriple(TT), Options(TM.Options), TM(TM),
103353358Sdim      FrameLowering(initializeFrameLowering(CPU, FS)),
104288943Sdim      // At this point initializeSubtargetDependencies has been called so
105288943Sdim      // we can query directly.
106276479Sdim      InstrInfo(isThumb1Only()
107276479Sdim                    ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
108276479Sdim                    : !isThumb()
109276479Sdim                          ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
110276479Sdim                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
111321369Sdim      TLInfo(TM, *this) {
112276479Sdim
113327952Sdim  CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
114327952Sdim  Legalizer.reset(new ARMLegalizerInfo(*this));
115321369Sdim
116321369Sdim  auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
117321369Sdim
118321369Sdim  // FIXME: At this point, we can't rely on Subtarget having RBI.
119321369Sdim  // It's awkward to mix passing RBI and the Subtarget; should we pass
120321369Sdim  // TII/TRI as well?
121327952Sdim  InstSelector.reset(createARMInstructionSelector(
122321369Sdim      *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
123321369Sdim
124327952Sdim  RegBankInfo.reset(RBI);
125321369Sdim}
126321369Sdim
127314564Sdimconst CallLowering *ARMSubtarget::getCallLowering() const {
128327952Sdim  return CallLoweringInfo.get();
129314564Sdim}
130314564Sdim
131360784SdimInstructionSelector *ARMSubtarget::getInstructionSelector() const {
132327952Sdim  return InstSelector.get();
133314564Sdim}
134314564Sdim
135314564Sdimconst LegalizerInfo *ARMSubtarget::getLegalizerInfo() const {
136327952Sdim  return Legalizer.get();
137314564Sdim}
138314564Sdim
139314564Sdimconst RegisterBankInfo *ARMSubtarget::getRegBankInfo() const {
140327952Sdim  return RegBankInfo.get();
141314564Sdim}
142314564Sdim
143314564Sdimbool ARMSubtarget::isXRaySupported() const {
144314564Sdim  // We don't currently suppport Thumb, but Windows requires Thumb.
145314564Sdim  return hasV6Ops() && hasARMOps() && !isTargetWindows();
146314564Sdim}
147314564Sdim
148249423Sdimvoid ARMSubtarget::initializeEnvironment() {
149296417Sdim  // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
150296417Sdim  // directly from it, but we can try to make sure they're consistent when both
151296417Sdim  // available.
152327952Sdim  UseSjLjEH = (isTargetDarwin() && !isTargetWatchABI() &&
153327952Sdim               Options.ExceptionModel == ExceptionHandling::None) ||
154327952Sdim              Options.ExceptionModel == ExceptionHandling::SjLj;
155296417Sdim  assert((!TM.getMCAsmInfo() ||
156296417Sdim          (TM.getMCAsmInfo()->getExceptionHandlingType() ==
157296417Sdim           ExceptionHandling::SjLj) == UseSjLjEH) &&
158296417Sdim         "inconsistent sjlj choice between CodeGen and MC");
159249423Sdim}
160249423Sdim
161280031Sdimvoid ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
162261991Sdim  if (CPUString.empty()) {
163296417Sdim    CPUString = "generic";
164296417Sdim
165296417Sdim    if (isTargetDarwin()) {
166296417Sdim      StringRef ArchName = TargetTriple.getArchName();
167327952Sdim      ARM::ArchKind AK = ARM::parseArch(ArchName);
168327952Sdim      if (AK == ARM::ArchKind::ARMV7S)
169296417Sdim        // Default to the Swift CPU when targeting armv7s/thumbv7s.
170296417Sdim        CPUString = "swift";
171327952Sdim      else if (AK == ARM::ArchKind::ARMV7K)
172296417Sdim        // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
173296417Sdim        // ARMv7k does not use SjLj exception handling.
174296417Sdim        CPUString = "cortex-a7";
175296417Sdim    }
176261991Sdim  }
177193323Sed
178224145Sdim  // Insert the architecture feature derived from the target triple into the
179224145Sdim  // feature string. This is important for setting features that are implied
180224145Sdim  // based on the architecture version.
181288943Sdim  std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString);
182224145Sdim  if (!FS.empty()) {
183224145Sdim    if (!ArchFS.empty())
184288943Sdim      ArchFS = (Twine(ArchFS) + "," + FS).str();
185224145Sdim    else
186224145Sdim      ArchFS = FS;
187218893Sdim  }
188224145Sdim  ParseSubtargetFeatures(CPUString, ArchFS);
189204961Srdivacky
190276479Sdim  // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
191276479Sdim  // Assert this for now to make the change obvious.
192276479Sdim  assert(hasV6T2Ops() || !hasThumb2());
193193323Sed
194314564Sdim  // Execute only support requires movt support
195344779Sdim  if (genExecuteOnly()) {
196344779Sdim    NoMovt = false;
197344779Sdim    assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
198344779Sdim  }
199314564Sdim
200239462Sdim  // Keep a pointer to static instruction cost data for the specified CPU.
201239462Sdim  SchedModel = getSchedModelForCPU(CPUString);
202239462Sdim
203224145Sdim  // Initialize scheduling itinerary for the specified CPU.
204224145Sdim  InstrItins = getInstrItineraryForCPU(CPUString);
205193323Sed
206276479Sdim  // FIXME: this is invalid for WindowsCE
207280031Sdim  if (isTargetWindows())
208276479Sdim    NoARM = true;
209198090Srdivacky
210193323Sed  if (isAAPCS_ABI())
211360784Sdim    stackAlignment = Align(8);
212296417Sdim  if (isTargetNaCl() || isAAPCS16_ABI())
213360784Sdim    stackAlignment = Align(16);
214193323Sed
215296417Sdim  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
216296417Sdim  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
217296417Sdim  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
218296417Sdim  // support in the assembler and linker to be used. This would need to be
219296417Sdim  // fixed to fully support tail calls in Thumb1.
220296417Sdim  //
221321369Sdim  // For ARMv8-M, we /do/ implement tail calls.  Doing this is tricky for v8-M
222321369Sdim  // baseline, since the LDM/POP instruction on Thumb doesn't take LR.  This
223321369Sdim  // means if we need to reload LR, it takes extra instructions, which outweighs
224321369Sdim  // the value of the tail call; but here we don't know yet whether LR is going
225327952Sdim  // to be used. We take the optimistic approach of generating the tail call and
226327952Sdim  // perhaps taking a hit if we need to restore the LR.
227261991Sdim
228296417Sdim  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
229296417Sdim  // but we need to make sure there are enough registers; the only valid
230296417Sdim  // registers are the 4 used for parameters.  We don't currently do this
231296417Sdim  // case.
232198090Srdivacky
233309124Sdim  SupportsTailCall = !isThumb() || hasV8MBaselineOps();
234249423Sdim
235296417Sdim  if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
236296417Sdim    SupportsTailCall = false;
237280031Sdim
238261991Sdim  switch (IT) {
239261991Sdim  case DefaultIT:
240288943Sdim    RestrictIT = hasV8Ops();
241261991Sdim    break;
242261991Sdim  case RestrictedIT:
243261991Sdim    RestrictIT = true;
244261991Sdim    break;
245261991Sdim  case NoRestrictedIT:
246261991Sdim    RestrictIT = false;
247261991Sdim    break;
248261991Sdim  }
249261991Sdim
250249423Sdim  // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
251288943Sdim  const FeatureBitset &Bits = getFeatureBits();
252288943Sdim  if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
253249423Sdim      (Options.UnsafeFPMath || isTargetDarwin()))
254249423Sdim    UseNEONForSinglePrecisionFP = true;
255309124Sdim
256314564Sdim  if (isRWPI())
257314564Sdim    ReserveR9 = true;
258314564Sdim
259360784Sdim  // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2
260360784Sdim  if (MVEVectorCostFactor == 0)
261360784Sdim    MVEVectorCostFactor = 2;
262360784Sdim
263309124Sdim  // FIXME: Teach TableGen to deal with these instead of doing it manually here.
264309124Sdim  switch (ARMProcFamily) {
265309124Sdim  case Others:
266309124Sdim  case CortexA5:
267309124Sdim    break;
268309124Sdim  case CortexA7:
269309124Sdim    LdStMultipleTiming = DoubleIssue;
270309124Sdim    break;
271309124Sdim  case CortexA8:
272309124Sdim    LdStMultipleTiming = DoubleIssue;
273309124Sdim    break;
274309124Sdim  case CortexA9:
275309124Sdim    LdStMultipleTiming = DoubleIssueCheckUnalignedAccess;
276309124Sdim    PreISelOperandLatencyAdjustment = 1;
277309124Sdim    break;
278309124Sdim  case CortexA12:
279309124Sdim    break;
280309124Sdim  case CortexA15:
281309124Sdim    MaxInterleaveFactor = 2;
282309124Sdim    PreISelOperandLatencyAdjustment = 1;
283309124Sdim    PartialUpdateClearance = 12;
284309124Sdim    break;
285309124Sdim  case CortexA17:
286309124Sdim  case CortexA32:
287309124Sdim  case CortexA35:
288309124Sdim  case CortexA53:
289327952Sdim  case CortexA55:
290309124Sdim  case CortexA57:
291309124Sdim  case CortexA72:
292309124Sdim  case CortexA73:
293327952Sdim  case CortexA75:
294353358Sdim  case CortexA76:
295309124Sdim  case CortexR4:
296309124Sdim  case CortexR4F:
297309124Sdim  case CortexR5:
298309124Sdim  case CortexR7:
299309124Sdim  case CortexM3:
300327952Sdim  case CortexR52:
301344779Sdim    break;
302344779Sdim  case Exynos:
303344779Sdim    LdStMultipleTiming = SingleIssuePlusExtras;
304344779Sdim    MaxInterleaveFactor = 4;
305344779Sdim    if (!isThumb())
306360784Sdim      PrefLoopLogAlignment = 3;
307344779Sdim    break;
308321369Sdim  case Kryo:
309309124Sdim    break;
310309124Sdim  case Krait:
311309124Sdim    PreISelOperandLatencyAdjustment = 1;
312309124Sdim    break;
313360784Sdim  case NeoverseN1:
314360784Sdim    break;
315309124Sdim  case Swift:
316309124Sdim    MaxInterleaveFactor = 2;
317309124Sdim    LdStMultipleTiming = SingleIssuePlusExtras;
318309124Sdim    PreISelOperandLatencyAdjustment = 1;
319309124Sdim    PartialUpdateClearance = 12;
320309124Sdim    break;
321309124Sdim  }
322193323Sed}
323198090Srdivacky
324341825Sdimbool ARMSubtarget::isTargetHardFloat() const { return TM.isTargetHardFloat(); }
325341825Sdim
326280031Sdimbool ARMSubtarget::isAPCS_ABI() const {
327280031Sdim  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
328280031Sdim  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS;
329280031Sdim}
330280031Sdimbool ARMSubtarget::isAAPCS_ABI() const {
331280031Sdim  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
332296417Sdim  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
333296417Sdim         TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
334280031Sdim}
335296417Sdimbool ARMSubtarget::isAAPCS16_ABI() const {
336296417Sdim  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
337296417Sdim  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
338296417Sdim}
339280031Sdim
340314564Sdimbool ARMSubtarget::isROPI() const {
341314564Sdim  return TM.getRelocationModel() == Reloc::ROPI ||
342314564Sdim         TM.getRelocationModel() == Reloc::ROPI_RWPI;
343314564Sdim}
344314564Sdimbool ARMSubtarget::isRWPI() const {
345314564Sdim  return TM.getRelocationModel() == Reloc::RWPI ||
346314564Sdim         TM.getRelocationModel() == Reloc::ROPI_RWPI;
347314564Sdim}
348314564Sdim
349309124Sdimbool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
350309124Sdim  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
351309124Sdim    return true;
352296417Sdim
353309124Sdim  // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
354309124Sdim  // the section that is being relocated. This means we have to use o load even
355309124Sdim  // for GVs that are known to be local to the dso.
356314564Sdim  if (isTargetMachO() && TM.isPositionIndependent() &&
357309124Sdim      (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
358198090Srdivacky    return true;
359198090Srdivacky
360198090Srdivacky  return false;
361198090Srdivacky}
362199481Srdivacky
363327952Sdimbool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const {
364327952Sdim  return isTargetELF() && TM.isPositionIndependent() &&
365327952Sdim         !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
366327952Sdim}
367327952Sdim
368218893Sdimunsigned ARMSubtarget::getMispredictionPenalty() const {
369280031Sdim  return SchedModel.MispredictPenalty;
370218893Sdim}
371218893Sdim
372296417Sdimbool ARMSubtarget::enableMachineScheduler() const {
373353358Sdim  // The MachineScheduler can increase register usage, so we use more high
374353358Sdim  // registers and end up with more T2 instructions that cannot be converted to
375353358Sdim  // T1 instructions. At least until we do better at converting to thumb1
376353358Sdim  // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
377353358Sdim  // Machine scheduler, relying on the DAG register pressure scheduler instead.
378353358Sdim  if (isMClass() && hasMinSize())
379353358Sdim    return false;
380327952Sdim  // Enable the MachineScheduler before register allocation for subtargets
381327952Sdim  // with the use-misched feature.
382327952Sdim  return useMachineScheduler();
383296417Sdim}
384296417Sdim
385360784Sdimbool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; }
386360784Sdim
387276479Sdim// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
388288943Sdimbool ARMSubtarget::enablePostRAScheduler() const {
389360784Sdim  if (enableMachineScheduler())
390360784Sdim    return false;
391327952Sdim  if (disablePostRAScheduler())
392296417Sdim    return false;
393360784Sdim  // Thumb1 cores will generally not benefit from post-ra scheduling
394327952Sdim  return !isThumb1Only();
395199481Srdivacky}
396276479Sdim
397360784Sdimbool ARMSubtarget::enablePostRAMachineScheduler() const {
398360784Sdim  if (!enableMachineScheduler())
399360784Sdim    return false;
400360784Sdim  if (disablePostRAScheduler())
401360784Sdim    return false;
402360784Sdim  return !isThumb1Only();
403360784Sdim}
404360784Sdim
405314564Sdimbool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
406276479Sdim
407353358Sdimbool ARMSubtarget::useStride4VFPs() const {
408296417Sdim  // For general targets, the prologue can grow when VFPs are allocated with
409296417Sdim  // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
410296417Sdim  // format which it's more important to get right.
411344779Sdim  return isTargetWatchABI() ||
412353358Sdim         (useWideStrideVFP() && !OptMinSize);
413296417Sdim}
414296417Sdim
415353358Sdimbool ARMSubtarget::useMovt() const {
416276479Sdim  // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
417276479Sdim  // immediates as it is inherently position independent, and may be out of
418276479Sdim  // range otherwise.
419309124Sdim  return !NoMovt && hasV8MBaselineOps() &&
420353358Sdim         (isTargetWindows() || !OptMinSize || genExecuteOnly());
421276479Sdim}
422288943Sdim
423288943Sdimbool ARMSubtarget::useFastISel() const {
424296417Sdim  // Enable fast-isel for any target, for testing only.
425296417Sdim  if (ForceFastISel)
426296417Sdim    return true;
427296417Sdim
428296417Sdim  // Limit fast-isel to the targets that are or have been tested.
429296417Sdim  if (!hasV6Ops())
430296417Sdim    return false;
431296417Sdim
432288943Sdim  // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
433288943Sdim  return TM.Options.EnableFastISel &&
434288943Sdim         ((isTargetMachO() && !isThumb1Only()) ||
435288943Sdim          (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
436288943Sdim}
437353358Sdim
438353358Sdimunsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
439353358Sdim  // The GPR register class has multiple possible allocation orders, with
440353358Sdim  // tradeoffs preferred by different sub-architectures and optimisation goals.
441353358Sdim  // The allocation orders are:
442353358Sdim  // 0: (the default tablegen order, not used)
443353358Sdim  // 1: r14, r0-r13
444353358Sdim  // 2: r0-r7
445353358Sdim  // 3: r0-r7, r12, lr, r8-r11
446353358Sdim  // Note that the register allocator will change this order so that
447353358Sdim  // callee-saved registers are used later, as they require extra work in the
448353358Sdim  // prologue/epilogue (though we sometimes override that).
449353358Sdim
450353358Sdim  // For thumb1-only targets, only the low registers are allocatable.
451353358Sdim  if (isThumb1Only())
452353358Sdim    return 2;
453353358Sdim
454353358Sdim  // Allocate low registers first, so we can select more 16-bit instructions.
455353358Sdim  // We also (in ignoreCSRForAllocationOrder) override  the default behaviour
456353358Sdim  // with regards to callee-saved registers, because pushing extra registers is
457353358Sdim  // much cheaper (in terms of code size) than using high registers. After
458353358Sdim  // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
459353358Sdim  // can return with the pop, don't need an extra "bx lr") and then the rest of
460353358Sdim  // the high registers.
461353358Sdim  if (isThumb2() && MF.getFunction().hasMinSize())
462353358Sdim    return 3;
463353358Sdim
464353358Sdim  // Otherwise, allocate in the default order, using LR first because saving it
465353358Sdim  // allows a shorter epilogue sequence.
466353358Sdim  return 1;
467353358Sdim}
468353358Sdim
469353358Sdimbool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
470353358Sdim                                               unsigned PhysReg) const {
471353358Sdim  // To minimize code size in Thumb2, we prefer the usage of low regs (lower
472353358Sdim  // cost per use) so we can  use narrow encoding. By default, caller-saved
473353358Sdim  // registers (e.g. lr, r12) are always  allocated first, regardless of
474353358Sdim  // their cost per use. When optForMinSize, we prefer the low regs even if
475353358Sdim  // they are CSR because usually push/pop can be folded into existing ones.
476353358Sdim  return isThumb2() && MF.getFunction().hasMinSize() &&
477353358Sdim         ARM::GPRRegClass.contains(PhysReg);
478353358Sdim}
479