1//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Top-level implementation for the PowerPC target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCTargetMachine.h"
15#include "PPC.h"
16#include "PPCTargetObjectFile.h"
17#include "PPCTargetTransformInfo.h"
18#include "llvm/CodeGen/Passes.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/LegacyPassManager.h"
21#include "llvm/MC/MCStreamer.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/FormattedStream.h"
24#include "llvm/Support/TargetRegistry.h"
25#include "llvm/Target/TargetOptions.h"
26#include "llvm/Transforms/Scalar.h"
27using namespace llvm;
28
29static cl::
30opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
31                        cl::desc("Disable CTR loops for PPC"));
32
33static cl::
34opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
35                            cl::desc("Disable PPC loop preinc prep"));
36
37static cl::opt<bool>
38VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
39  cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
40
41static cl::
42opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
43                                cl::desc("Disable VSX Swap Removal for PPC"));
44
45static cl::
46opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
47                            cl::desc("Disable machine peepholes for PPC"));
48
49static cl::opt<bool>
50EnableGEPOpt("ppc-gep-opt", cl::Hidden,
51             cl::desc("Enable optimizations on complex GEPs"),
52             cl::init(true));
53
54static cl::opt<bool>
55EnablePrefetch("enable-ppc-prefetching",
56                  cl::desc("disable software prefetching on PPC"),
57                  cl::init(false), cl::Hidden);
58
59static cl::opt<bool>
60EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
61                      cl::desc("Add extra TOC register dependencies"),
62                      cl::init(true), cl::Hidden);
63
64static cl::opt<bool>
65EnableMachineCombinerPass("ppc-machine-combiner",
66                          cl::desc("Enable the machine combiner pass"),
67                          cl::init(true), cl::Hidden);
68
69extern "C" void LLVMInitializePowerPCTarget() {
70  // Register the targets
71  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
72  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
73  RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
74
75  PassRegistry &PR = *PassRegistry::getPassRegistry();
76  initializePPCBoolRetToIntPass(PR);
77}
78
79/// Return the datalayout string of a subtarget.
80static std::string getDataLayoutString(const Triple &T) {
81  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
82  std::string Ret;
83
84  // Most PPC* platforms are big endian, PPC64LE is little endian.
85  if (T.getArch() == Triple::ppc64le)
86    Ret = "e";
87  else
88    Ret = "E";
89
90  Ret += DataLayout::getManglingComponent(T);
91
92  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
93  // pointers.
94  if (!is64Bit || T.getOS() == Triple::Lv2)
95    Ret += "-p:32:32";
96
97  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
98  // documentation are wrong; these are correct (i.e. "what gcc does").
99  if (is64Bit || !T.isOSDarwin())
100    Ret += "-i64:64";
101  else
102    Ret += "-f64:32:64";
103
104  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
105  if (is64Bit)
106    Ret += "-n32:64";
107  else
108    Ret += "-n32";
109
110  return Ret;
111}
112
113static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
114                                      const Triple &TT) {
115  std::string FullFS = FS;
116
117  // Make sure 64-bit features are available when CPUname is generic
118  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
119    if (!FullFS.empty())
120      FullFS = "+64bit," + FullFS;
121    else
122      FullFS = "+64bit";
123  }
124
125  if (OL >= CodeGenOpt::Default) {
126    if (!FullFS.empty())
127      FullFS = "+crbits," + FullFS;
128    else
129      FullFS = "+crbits";
130  }
131
132  if (OL != CodeGenOpt::None) {
133    if (!FullFS.empty())
134      FullFS = "+invariant-function-descriptors," + FullFS;
135    else
136      FullFS = "+invariant-function-descriptors";
137  }
138
139  return FullFS;
140}
141
142static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
143  // If it isn't a Mach-O file then it's going to be a linux ELF
144  // object file.
145  if (TT.isOSDarwin())
146    return make_unique<TargetLoweringObjectFileMachO>();
147
148  return make_unique<PPC64LinuxTargetObjectFile>();
149}
150
151static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
152                                                 const TargetOptions &Options) {
153  if (Options.MCOptions.getABIName().startswith("elfv1"))
154    return PPCTargetMachine::PPC_ABI_ELFv1;
155  else if (Options.MCOptions.getABIName().startswith("elfv2"))
156    return PPCTargetMachine::PPC_ABI_ELFv2;
157
158  assert(Options.MCOptions.getABIName().empty() &&
159         "Unknown target-abi option!");
160
161  if (!TT.isMacOSX()) {
162    switch (TT.getArch()) {
163    case Triple::ppc64le:
164      return PPCTargetMachine::PPC_ABI_ELFv2;
165    case Triple::ppc64:
166      return PPCTargetMachine::PPC_ABI_ELFv1;
167    default:
168      // Fallthrough.
169      ;
170    }
171  }
172  return PPCTargetMachine::PPC_ABI_UNKNOWN;
173}
174
175// The FeatureString here is a little subtle. We are modifying the feature
176// string with what are (currently) non-function specific overrides as it goes
177// into the LLVMTargetMachine constructor and then using the stored value in the
178// Subtarget constructor below it.
179PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
180                                   StringRef CPU, StringRef FS,
181                                   const TargetOptions &Options,
182                                   Reloc::Model RM, CodeModel::Model CM,
183                                   CodeGenOpt::Level OL)
184    : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
185                        computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
186      TLOF(createTLOF(getTargetTriple())),
187      TargetABI(computeTargetABI(TT, Options)),
188      Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
189
190  // For the estimates, convergence is quadratic, so we essentially double the
191  // number of digits correct after every iteration. For both FRE and FRSQRTE,
192  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
193  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
194  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
195           RefinementSteps64 = RefinementSteps + 1;
196
197  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
198  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
199  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
200  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
201
202  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
203  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
204  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
205  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
206
207  initAsmInfo();
208}
209
210PPCTargetMachine::~PPCTargetMachine() {}
211
212void PPC32TargetMachine::anchor() { }
213
214PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
215                                       StringRef CPU, StringRef FS,
216                                       const TargetOptions &Options,
217                                       Reloc::Model RM, CodeModel::Model CM,
218                                       CodeGenOpt::Level OL)
219    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
220
221void PPC64TargetMachine::anchor() { }
222
223PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
224                                       StringRef CPU, StringRef FS,
225                                       const TargetOptions &Options,
226                                       Reloc::Model RM, CodeModel::Model CM,
227                                       CodeGenOpt::Level OL)
228    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
229
230const PPCSubtarget *
231PPCTargetMachine::getSubtargetImpl(const Function &F) const {
232  Attribute CPUAttr = F.getFnAttribute("target-cpu");
233  Attribute FSAttr = F.getFnAttribute("target-features");
234
235  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
236                        ? CPUAttr.getValueAsString().str()
237                        : TargetCPU;
238  std::string FS = !FSAttr.hasAttribute(Attribute::None)
239                       ? FSAttr.getValueAsString().str()
240                       : TargetFS;
241
242  // FIXME: This is related to the code below to reset the target options,
243  // we need to know whether or not the soft float flag is set on the
244  // function before we can generate a subtarget. We also need to use
245  // it as a key for the subtarget since that can be the only difference
246  // between two functions.
247  bool SoftFloat =
248    F.hasFnAttribute("use-soft-float") &&
249    F.getFnAttribute("use-soft-float").getValueAsString() == "true";
250  // If the soft float attribute is set on the function turn on the soft float
251  // subtarget feature.
252  if (SoftFloat)
253    FS += FS.empty() ? "+soft-float" : ",+soft-float";
254
255  auto &I = SubtargetMap[CPU + FS];
256  if (!I) {
257    // This needs to be done before we create a new subtarget since any
258    // creation will depend on the TM and the code generation flags on the
259    // function that reside in TargetOptions.
260    resetTargetOptions(F);
261    I = llvm::make_unique<PPCSubtarget>(
262        TargetTriple, CPU,
263        // FIXME: It would be good to have the subtarget additions here
264        // not necessary. Anything that turns them on/off (overrides) ends
265        // up being put at the end of the feature string, but the defaults
266        // shouldn't require adding them. Fixing this means pulling Feature64Bit
267        // out of most of the target cpus in the .td file and making it set only
268        // as part of initialization via the TargetTriple.
269        computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
270  }
271  return I.get();
272}
273
274//===----------------------------------------------------------------------===//
275// Pass Pipeline Configuration
276//===----------------------------------------------------------------------===//
277
278namespace {
279/// PPC Code Generator Pass Configuration Options.
280class PPCPassConfig : public TargetPassConfig {
281public:
282  PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
283    : TargetPassConfig(TM, PM) {}
284
285  PPCTargetMachine &getPPCTargetMachine() const {
286    return getTM<PPCTargetMachine>();
287  }
288
289  void addIRPasses() override;
290  bool addPreISel() override;
291  bool addILPOpts() override;
292  bool addInstSelector() override;
293  void addMachineSSAOptimization() override;
294  void addPreRegAlloc() override;
295  void addPreSched2() override;
296  void addPreEmitPass() override;
297};
298} // namespace
299
300TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
301  return new PPCPassConfig(this, PM);
302}
303
304void PPCPassConfig::addIRPasses() {
305  if (TM->getOptLevel() != CodeGenOpt::None)
306    addPass(createPPCBoolRetToIntPass());
307  addPass(createAtomicExpandPass(&getPPCTargetMachine()));
308
309  // For the BG/Q (or if explicitly requested), add explicit data prefetch
310  // intrinsics.
311  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
312                        getOptLevel() != CodeGenOpt::None;
313  if (EnablePrefetch.getNumOccurrences() > 0)
314    UsePrefetching = EnablePrefetch;
315  if (UsePrefetching)
316    addPass(createPPCLoopDataPrefetchPass());
317
318  if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
319    // Call SeparateConstOffsetFromGEP pass to extract constants within indices
320    // and lower a GEP with multiple indices to either arithmetic operations or
321    // multiple GEPs with single index.
322    addPass(createSeparateConstOffsetFromGEPPass(TM, true));
323    // Call EarlyCSE pass to find and remove subexpressions in the lowered
324    // result.
325    addPass(createEarlyCSEPass());
326    // Do loop invariant code motion in case part of the lowered result is
327    // invariant.
328    addPass(createLICMPass());
329  }
330
331  TargetPassConfig::addIRPasses();
332}
333
334bool PPCPassConfig::addPreISel() {
335  if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
336    addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
337
338  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
339    addPass(createPPCCTRLoops(getPPCTargetMachine()));
340
341  return false;
342}
343
344bool PPCPassConfig::addILPOpts() {
345  addPass(&EarlyIfConverterID);
346
347  if (EnableMachineCombinerPass)
348    addPass(&MachineCombinerID);
349
350  return true;
351}
352
353bool PPCPassConfig::addInstSelector() {
354  // Install an instruction selector.
355  addPass(createPPCISelDag(getPPCTargetMachine()));
356
357#ifndef NDEBUG
358  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
359    addPass(createPPCCTRLoopsVerify());
360#endif
361
362  addPass(createPPCVSXCopyPass());
363  return false;
364}
365
366void PPCPassConfig::addMachineSSAOptimization() {
367  TargetPassConfig::addMachineSSAOptimization();
368  // For little endian, remove where possible the vector swap instructions
369  // introduced at code generation to normalize vector element order.
370  if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
371      !DisableVSXSwapRemoval)
372    addPass(createPPCVSXSwapRemovalPass());
373  // Target-specific peephole cleanups performed after instruction
374  // selection.
375  if (!DisableMIPeephole) {
376    addPass(createPPCMIPeepholePass());
377    addPass(&DeadMachineInstructionElimID);
378  }
379}
380
381void PPCPassConfig::addPreRegAlloc() {
382  initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
383  insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
384             &PPCVSXFMAMutateID);
385  if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
386    addPass(createPPCTLSDynamicCallPass());
387  if (EnableExtraTOCRegDeps)
388    addPass(createPPCTOCRegDepsPass());
389}
390
391void PPCPassConfig::addPreSched2() {
392  if (getOptLevel() != CodeGenOpt::None)
393    addPass(&IfConverterID);
394}
395
396void PPCPassConfig::addPreEmitPass() {
397  if (getOptLevel() != CodeGenOpt::None)
398    addPass(createPPCEarlyReturnPass(), false);
399  // Must run branch selection immediately preceding the asm printer.
400  addPass(createPPCBranchSelectionPass(), false);
401}
402
403TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
404  return TargetIRAnalysis([this](const Function &F) {
405    return TargetTransformInfo(PPCTTIImpl(this, F));
406  });
407}
408