ARMTargetMachine.cpp revision 360784
1//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "ARMTargetMachine.h"
13#include "ARM.h"
14#include "ARMMacroFusion.h"
15#include "ARMSubtarget.h"
16#include "ARMTargetObjectFile.h"
17#include "ARMTargetTransformInfo.h"
18#include "MCTargetDesc/ARMMCTargetDesc.h"
19#include "TargetInfo/ARMTargetInfo.h"
20#include "llvm/ADT/Optional.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/ADT/Triple.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/CodeGen/ExecutionDomainFix.h"
26#include "llvm/CodeGen/GlobalISel/CallLowering.h"
27#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
28#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
29#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
30#include "llvm/CodeGen/GlobalISel/Legalizer.h"
31#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
32#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
33#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineScheduler.h"
36#include "llvm/CodeGen/Passes.h"
37#include "llvm/CodeGen/TargetPassConfig.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/IR/Function.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/CodeGen.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/TargetParser.h"
46#include "llvm/Support/TargetRegistry.h"
47#include "llvm/Target/TargetLoweringObjectFile.h"
48#include "llvm/Target/TargetOptions.h"
49#include "llvm/Transforms/CFGuard.h"
50#include "llvm/Transforms/Scalar.h"
51#include <cassert>
52#include <memory>
53#include <string>
54
55using namespace llvm;
56
57static cl::opt<bool>
58DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
59                   cl::desc("Inhibit optimization of S->D register accesses on A15"),
60                   cl::init(false));
61
62static cl::opt<bool>
63EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
64                 cl::desc("Run SimplifyCFG after expanding atomic operations"
65                          " to make use of cmpxchg flow-based information"),
66                 cl::init(true));
67
68static cl::opt<bool>
69EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
70                      cl::desc("Enable ARM load/store optimization pass"),
71                      cl::init(true));
72
73// FIXME: Unify control over GlobalMerge.
74static cl::opt<cl::boolOrDefault>
75EnableGlobalMerge("arm-global-merge", cl::Hidden,
76                  cl::desc("Enable the global merge pass"));
77
78namespace llvm {
79  void initializeARMExecutionDomainFixPass(PassRegistry&);
80}
81
82extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
83  // Register the target.
84  RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
85  RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget());
86  RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
87  RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget());
88
89  PassRegistry &Registry = *PassRegistry::getPassRegistry();
90  initializeGlobalISel(Registry);
91  initializeARMLoadStoreOptPass(Registry);
92  initializeARMPreAllocLoadStoreOptPass(Registry);
93  initializeARMParallelDSPPass(Registry);
94  initializeARMConstantIslandsPass(Registry);
95  initializeARMExecutionDomainFixPass(Registry);
96  initializeARMExpandPseudoPass(Registry);
97  initializeThumb2SizeReducePass(Registry);
98  initializeMVEVPTBlockPass(Registry);
99  initializeMVETailPredicationPass(Registry);
100  initializeARMLowOverheadLoopsPass(Registry);
101  initializeMVEGatherScatterLoweringPass(Registry);
102}
103
104static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
105  if (TT.isOSBinFormatMachO())
106    return std::make_unique<TargetLoweringObjectFileMachO>();
107  if (TT.isOSWindows())
108    return std::make_unique<TargetLoweringObjectFileCOFF>();
109  return std::make_unique<ARMElfTargetObjectFile>();
110}
111
112static ARMBaseTargetMachine::ARMABI
113computeTargetABI(const Triple &TT, StringRef CPU,
114                 const TargetOptions &Options) {
115  StringRef ABIName = Options.MCOptions.getABIName();
116
117  if (ABIName.empty())
118    ABIName = ARM::computeDefaultTargetABI(TT, CPU);
119
120  if (ABIName == "aapcs16")
121    return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
122  else if (ABIName.startswith("aapcs"))
123    return ARMBaseTargetMachine::ARM_ABI_AAPCS;
124  else if (ABIName.startswith("apcs"))
125    return ARMBaseTargetMachine::ARM_ABI_APCS;
126
127  llvm_unreachable("Unhandled/unknown ABI Name!");
128  return ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
129}
130
131static std::string computeDataLayout(const Triple &TT, StringRef CPU,
132                                     const TargetOptions &Options,
133                                     bool isLittle) {
134  auto ABI = computeTargetABI(TT, CPU, Options);
135  std::string Ret;
136
137  if (isLittle)
138    // Little endian.
139    Ret += "e";
140  else
141    // Big endian.
142    Ret += "E";
143
144  Ret += DataLayout::getManglingComponent(TT);
145
146  // Pointers are 32 bits and aligned to 32 bits.
147  Ret += "-p:32:32";
148
149  // Function pointers are aligned to 8 bits (because the LSB stores the
150  // ARM/Thumb state).
151  Ret += "-Fi8";
152
153  // ABIs other than APCS have 64 bit integers with natural alignment.
154  if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
155    Ret += "-i64:64";
156
157  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
158  // bits, others to 64 bits. We always try to align to 64 bits.
159  if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
160    Ret += "-f64:32:64";
161
162  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
163  // to 64. We always ty to give them natural alignment.
164  if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
165    Ret += "-v64:32:64-v128:32:128";
166  else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
167    Ret += "-v128:64:128";
168
169  // Try to align aggregates to 32 bits (the default is 64 bits, which has no
170  // particular hardware support on 32-bit ARM).
171  Ret += "-a:0:32";
172
173  // Integer registers are 32 bits.
174  Ret += "-n32";
175
176  // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
177  // aligned everywhere else.
178  if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
179    Ret += "-S128";
180  else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
181    Ret += "-S64";
182  else
183    Ret += "-S32";
184
185  return Ret;
186}
187
188static Reloc::Model getEffectiveRelocModel(const Triple &TT,
189                                           Optional<Reloc::Model> RM) {
190  if (!RM.hasValue())
191    // Default relocation model on Darwin is PIC.
192    return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
193
194  if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI)
195    assert(TT.isOSBinFormatELF() &&
196           "ROPI/RWPI currently only supported for ELF");
197
198  // DynamicNoPIC is only used on darwin.
199  if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
200    return Reloc::Static;
201
202  return *RM;
203}
204
205/// Create an ARM architecture model.
206///
207ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
208                                           StringRef CPU, StringRef FS,
209                                           const TargetOptions &Options,
210                                           Optional<Reloc::Model> RM,
211                                           Optional<CodeModel::Model> CM,
212                                           CodeGenOpt::Level OL, bool isLittle)
213    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
214                        CPU, FS, Options, getEffectiveRelocModel(TT, RM),
215                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
216      TargetABI(computeTargetABI(TT, CPU, Options)),
217      TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
218
219  // Default to triple-appropriate float ABI
220  if (Options.FloatABIType == FloatABI::Default) {
221    if (isTargetHardFloat())
222      this->Options.FloatABIType = FloatABI::Hard;
223    else
224      this->Options.FloatABIType = FloatABI::Soft;
225  }
226
227  // Default to triple-appropriate EABI
228  if (Options.EABIVersion == EABI::Default ||
229      Options.EABIVersion == EABI::Unknown) {
230    // musl is compatible with glibc with regard to EABI version
231    if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
232         TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
233         TargetTriple.getEnvironment() == Triple::MuslEABI ||
234         TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
235        !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
236      this->Options.EABIVersion = EABI::GNU;
237    else
238      this->Options.EABIVersion = EABI::EABI5;
239  }
240
241  if (TT.isOSBinFormatMachO()) {
242    this->Options.TrapUnreachable = true;
243    this->Options.NoTrapAfterNoreturn = true;
244  }
245
246  initAsmInfo();
247}
248
249ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
250
251const ARMSubtarget *
252ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
253  Attribute CPUAttr = F.getFnAttribute("target-cpu");
254  Attribute FSAttr = F.getFnAttribute("target-features");
255
256  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
257                        ? CPUAttr.getValueAsString().str()
258                        : TargetCPU;
259  std::string FS = !FSAttr.hasAttribute(Attribute::None)
260                       ? FSAttr.getValueAsString().str()
261                       : TargetFS;
262
263  // FIXME: This is related to the code below to reset the target options,
264  // we need to know whether or not the soft float flag is set on the
265  // function before we can generate a subtarget. We also need to use
266  // it as a key for the subtarget since that can be the only difference
267  // between two functions.
268  bool SoftFloat =
269      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
270  // If the soft float attribute is set on the function turn on the soft float
271  // subtarget feature.
272  if (SoftFloat)
273    FS += FS.empty() ? "+soft-float" : ",+soft-float";
274
275  // Use the optminsize to identify the subtarget, but don't use it in the
276  // feature string.
277  std::string Key = CPU + FS;
278  if (F.hasMinSize())
279    Key += "+minsize";
280
281  auto &I = SubtargetMap[Key];
282  if (!I) {
283    // This needs to be done before we create a new subtarget since any
284    // creation will depend on the TM and the code generation flags on the
285    // function that reside in TargetOptions.
286    resetTargetOptions(F);
287    I = std::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
288                                        F.hasMinSize());
289
290    if (!I->isThumb() && !I->hasARMOps())
291      F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
292          "instructions, but the target does not support ARM mode execution.");
293  }
294
295  return I.get();
296}
297
298TargetTransformInfo
299ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) {
300  return TargetTransformInfo(ARMTTIImpl(this, F));
301}
302
303ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
304                                       StringRef CPU, StringRef FS,
305                                       const TargetOptions &Options,
306                                       Optional<Reloc::Model> RM,
307                                       Optional<CodeModel::Model> CM,
308                                       CodeGenOpt::Level OL, bool JIT)
309    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
310
311ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
312                                       StringRef CPU, StringRef FS,
313                                       const TargetOptions &Options,
314                                       Optional<Reloc::Model> RM,
315                                       Optional<CodeModel::Model> CM,
316                                       CodeGenOpt::Level OL, bool JIT)
317    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
318
319namespace {
320
321/// ARM Code Generator Pass Configuration Options.
322class ARMPassConfig : public TargetPassConfig {
323public:
324  ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
325      : TargetPassConfig(TM, PM) {}
326
327  ARMBaseTargetMachine &getARMTargetMachine() const {
328    return getTM<ARMBaseTargetMachine>();
329  }
330
331  ScheduleDAGInstrs *
332  createMachineScheduler(MachineSchedContext *C) const override {
333    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
334    // add DAG Mutations here.
335    const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
336    if (ST.hasFusion())
337      DAG->addMutation(createARMMacroFusionDAGMutation());
338    return DAG;
339  }
340
341  ScheduleDAGInstrs *
342  createPostMachineScheduler(MachineSchedContext *C) const override {
343    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
344    // add DAG Mutations here.
345    const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
346    if (ST.hasFusion())
347      DAG->addMutation(createARMMacroFusionDAGMutation());
348    return DAG;
349  }
350
351  void addIRPasses() override;
352  void addCodeGenPrepare() override;
353  bool addPreISel() override;
354  bool addInstSelector() override;
355  bool addIRTranslator() override;
356  bool addLegalizeMachineIR() override;
357  bool addRegBankSelect() override;
358  bool addGlobalInstructionSelect() override;
359  void addPreRegAlloc() override;
360  void addPreSched2() override;
361  void addPreEmitPass() override;
362
363  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
364};
365
366class ARMExecutionDomainFix : public ExecutionDomainFix {
367public:
368  static char ID;
369  ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {}
370  StringRef getPassName() const override {
371    return "ARM Execution Domain Fix";
372  }
373};
374char ARMExecutionDomainFix::ID;
375
376} // end anonymous namespace
377
378INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix",
379  "ARM Execution Domain Fix", false, false)
380INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
381INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix",
382  "ARM Execution Domain Fix", false, false)
383
384TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
385  return new ARMPassConfig(*this, PM);
386}
387
388std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const {
389  return getStandardCSEConfigForOpt(TM->getOptLevel());
390}
391
392void ARMPassConfig::addIRPasses() {
393  if (TM->Options.ThreadModel == ThreadModel::Single)
394    addPass(createLowerAtomicPass());
395  else
396    addPass(createAtomicExpandPass());
397
398  // Cmpxchg instructions are often used with a subsequent comparison to
399  // determine whether it succeeded. We can exploit existing control-flow in
400  // ldrex/strex loops to simplify this, but it needs tidying up.
401  if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
402    addPass(createCFGSimplificationPass(
403        1, false, false, true, true, [this](const Function &F) {
404          const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
405          return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
406        }));
407
408  addPass(createMVEGatherScatterLoweringPass());
409
410  TargetPassConfig::addIRPasses();
411
412  // Run the parallel DSP pass.
413  if (getOptLevel() == CodeGenOpt::Aggressive)
414    addPass(createARMParallelDSPPass());
415
416  // Match interleaved memory accesses to ldN/stN intrinsics.
417  if (TM->getOptLevel() != CodeGenOpt::None)
418    addPass(createInterleavedAccessPass());
419
420  // Add Control Flow Guard checks.
421  if (TM->getTargetTriple().isOSWindows())
422    addPass(createCFGuardCheckPass());
423}
424
425void ARMPassConfig::addCodeGenPrepare() {
426  if (getOptLevel() != CodeGenOpt::None)
427    addPass(createTypePromotionPass());
428  TargetPassConfig::addCodeGenPrepare();
429}
430
431bool ARMPassConfig::addPreISel() {
432  if ((TM->getOptLevel() != CodeGenOpt::None &&
433       EnableGlobalMerge == cl::BOU_UNSET) ||
434      EnableGlobalMerge == cl::BOU_TRUE) {
435    // FIXME: This is using the thumb1 only constant value for
436    // maximal global offset for merging globals. We may want
437    // to look into using the old value for non-thumb1 code of
438    // 4095 based on the TargetMachine, but this starts to become
439    // tricky when doing code gen per function.
440    bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
441                               (EnableGlobalMerge == cl::BOU_UNSET);
442    // Merging of extern globals is enabled by default on non-Mach-O as we
443    // expect it to be generally either beneficial or harmless. On Mach-O it
444    // is disabled as we emit the .subsections_via_symbols directive which
445    // means that merging extern globals is not safe.
446    bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
447    addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
448                                  MergeExternalByDefault));
449  }
450
451  if (TM->getOptLevel() != CodeGenOpt::None) {
452    addPass(createHardwareLoopsPass());
453    addPass(createMVETailPredicationPass());
454  }
455
456  return false;
457}
458
459bool ARMPassConfig::addInstSelector() {
460  addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
461  return false;
462}
463
464bool ARMPassConfig::addIRTranslator() {
465  addPass(new IRTranslator());
466  return false;
467}
468
469bool ARMPassConfig::addLegalizeMachineIR() {
470  addPass(new Legalizer());
471  return false;
472}
473
474bool ARMPassConfig::addRegBankSelect() {
475  addPass(new RegBankSelect());
476  return false;
477}
478
479bool ARMPassConfig::addGlobalInstructionSelect() {
480  addPass(new InstructionSelect());
481  return false;
482}
483
484void ARMPassConfig::addPreRegAlloc() {
485  if (getOptLevel() != CodeGenOpt::None) {
486    addPass(createMLxExpansionPass());
487
488    if (EnableARMLoadStoreOpt)
489      addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
490
491    if (!DisableA15SDOptimization)
492      addPass(createA15SDOptimizerPass());
493  }
494}
495
496void ARMPassConfig::addPreSched2() {
497  if (getOptLevel() != CodeGenOpt::None) {
498    if (EnableARMLoadStoreOpt)
499      addPass(createARMLoadStoreOptimizationPass());
500
501    addPass(new ARMExecutionDomainFix());
502    addPass(createBreakFalseDeps());
503  }
504
505  // Expand some pseudo instructions into multiple instructions to allow
506  // proper scheduling.
507  addPass(createARMExpandPseudoPass());
508
509  if (getOptLevel() != CodeGenOpt::None) {
510    // in v8, IfConversion depends on Thumb instruction widths
511    addPass(createThumb2SizeReductionPass([this](const Function &F) {
512      return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
513    }));
514
515    addPass(createIfConverter([](const MachineFunction &MF) {
516      return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
517    }));
518  }
519  addPass(createMVEVPTBlockPass());
520  addPass(createThumb2ITBlockPass());
521
522  // Add both scheduling passes to give the subtarget an opportunity to pick
523  // between them.
524  if (getOptLevel() != CodeGenOpt::None) {
525    addPass(&PostMachineSchedulerID);
526    addPass(&PostRASchedulerID);
527  }
528}
529
530void ARMPassConfig::addPreEmitPass() {
531  addPass(createThumb2SizeReductionPass());
532
533  // Constant island pass work on unbundled instructions.
534  addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
535    return MF.getSubtarget<ARMSubtarget>().isThumb2();
536  }));
537
538  // Don't optimize barriers at -O0.
539  if (getOptLevel() != CodeGenOpt::None)
540    addPass(createARMOptimizeBarriersPass());
541
542  addPass(createARMConstantIslandPass());
543  addPass(createARMLowOverheadLoopsPass());
544
545  // Identify valid longjmp targets for Windows Control Flow Guard.
546  if (TM->getTargetTriple().isOSWindows())
547    addPass(createCFGuardLongjmpPass());
548}
549