1193323Sed//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// Top-level implementation for the PowerPC target. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14234353Sdim#include "PPCTargetMachine.h" 15193323Sed#include "PPC.h" 16280031Sdim#include "PPCTargetObjectFile.h" 17288943Sdim#include "PPCTargetTransformInfo.h" 18249423Sdim#include "llvm/CodeGen/Passes.h" 19280031Sdim#include "llvm/IR/Function.h" 20288943Sdim#include "llvm/IR/LegacyPassManager.h" 21249423Sdim#include "llvm/MC/MCStreamer.h" 22239462Sdim#include "llvm/Support/CommandLine.h" 23198090Srdivacky#include "llvm/Support/FormattedStream.h" 24226633Sdim#include "llvm/Support/TargetRegistry.h" 25249423Sdim#include "llvm/Target/TargetOptions.h" 26280031Sdim#include "llvm/Transforms/Scalar.h" 27193323Sedusing namespace llvm; 28193323Sed 29239462Sdimstatic cl:: 30239462Sdimopt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, 31239462Sdim cl::desc("Disable CTR loops for PPC")); 32239462Sdim 33288943Sdimstatic cl:: 34288943Sdimopt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, 35288943Sdim cl::desc("Disable PPC loop preinc prep")); 36288943Sdim 37276479Sdimstatic cl::opt<bool> 38276479SdimVSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", 39276479Sdim cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); 40276479Sdim 41288943Sdimstatic cl:: 42288943Sdimopt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, 43288943Sdim cl::desc("Disable VSX Swap Removal for PPC")); 44288943Sdim 45296417Sdimstatic cl:: 46296417Sdimopt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, 47296417Sdim cl::desc("Disable machine peepholes for PPC")); 48296417Sdim 49280031Sdimstatic cl::opt<bool> 50280031SdimEnableGEPOpt("ppc-gep-opt", cl::Hidden, 51280031Sdim cl::desc("Enable optimizations on complex GEPs"), 52280031Sdim cl::init(true)); 53280031Sdim 54288943Sdimstatic cl::opt<bool> 55288943SdimEnablePrefetch("enable-ppc-prefetching", 56288943Sdim cl::desc("disable software prefetching on PPC"), 57288943Sdim cl::init(false), cl::Hidden); 58288943Sdim 59288943Sdimstatic cl::opt<bool> 60288943SdimEnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", 61288943Sdim cl::desc("Add extra TOC register dependencies"), 62288943Sdim cl::init(true), cl::Hidden); 63288943Sdim 64296417Sdimstatic cl::opt<bool> 65296417SdimEnableMachineCombinerPass("ppc-machine-combiner", 66296417Sdim cl::desc("Enable the machine combiner pass"), 67296417Sdim cl::init(true), cl::Hidden); 68296417Sdim 69198090Srdivackyextern "C" void LLVMInitializePowerPCTarget() { 70198090Srdivacky // Register the targets 71234353Sdim RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); 72198090Srdivacky RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); 73261991Sdim RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); 74296417Sdim 75296417Sdim PassRegistry &PR = *PassRegistry::getPassRegistry(); 76296417Sdim initializePPCBoolRetToIntPass(PR); 77193323Sed} 78193323Sed 79288943Sdim/// Return the datalayout string of a subtarget. 80288943Sdimstatic std::string getDataLayoutString(const Triple &T) { 81288943Sdim bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; 82288943Sdim std::string Ret; 83288943Sdim 84288943Sdim // Most PPC* platforms are big endian, PPC64LE is little endian. 85288943Sdim if (T.getArch() == Triple::ppc64le) 86288943Sdim Ret = "e"; 87288943Sdim else 88288943Sdim Ret = "E"; 89288943Sdim 90288943Sdim Ret += DataLayout::getManglingComponent(T); 91288943Sdim 92288943Sdim // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit 93288943Sdim // pointers. 94288943Sdim if (!is64Bit || T.getOS() == Triple::Lv2) 95288943Sdim Ret += "-p:32:32"; 96288943Sdim 97288943Sdim // Note, the alignment values for f64 and i64 on ppc64 in Darwin 98288943Sdim // documentation are wrong; these are correct (i.e. "what gcc does"). 99288943Sdim if (is64Bit || !T.isOSDarwin()) 100288943Sdim Ret += "-i64:64"; 101288943Sdim else 102288943Sdim Ret += "-f64:32:64"; 103288943Sdim 104288943Sdim // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. 105288943Sdim if (is64Bit) 106288943Sdim Ret += "-n32:64"; 107288943Sdim else 108288943Sdim Ret += "-n32"; 109288943Sdim 110288943Sdim return Ret; 111288943Sdim} 112288943Sdim 113288943Sdimstatic std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, 114288943Sdim const Triple &TT) { 115280031Sdim std::string FullFS = FS; 116280031Sdim 117280031Sdim // Make sure 64-bit features are available when CPUname is generic 118288943Sdim if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { 119280031Sdim if (!FullFS.empty()) 120280031Sdim FullFS = "+64bit," + FullFS; 121280031Sdim else 122280031Sdim FullFS = "+64bit"; 123280031Sdim } 124280031Sdim 125280031Sdim if (OL >= CodeGenOpt::Default) { 126280031Sdim if (!FullFS.empty()) 127280031Sdim FullFS = "+crbits," + FullFS; 128280031Sdim else 129280031Sdim FullFS = "+crbits"; 130280031Sdim } 131288943Sdim 132288943Sdim if (OL != CodeGenOpt::None) { 133296417Sdim if (!FullFS.empty()) 134288943Sdim FullFS = "+invariant-function-descriptors," + FullFS; 135288943Sdim else 136288943Sdim FullFS = "+invariant-function-descriptors"; 137288943Sdim } 138288943Sdim 139280031Sdim return FullFS; 140280031Sdim} 141280031Sdim 142280031Sdimstatic std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 143280031Sdim // If it isn't a Mach-O file then it's going to be a linux ELF 144280031Sdim // object file. 145280031Sdim if (TT.isOSDarwin()) 146280031Sdim return make_unique<TargetLoweringObjectFileMachO>(); 147280031Sdim 148280031Sdim return make_unique<PPC64LinuxTargetObjectFile>(); 149280031Sdim} 150280031Sdim 151288943Sdimstatic PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, 152288943Sdim const TargetOptions &Options) { 153288943Sdim if (Options.MCOptions.getABIName().startswith("elfv1")) 154288943Sdim return PPCTargetMachine::PPC_ABI_ELFv1; 155288943Sdim else if (Options.MCOptions.getABIName().startswith("elfv2")) 156288943Sdim return PPCTargetMachine::PPC_ABI_ELFv2; 157288943Sdim 158288943Sdim assert(Options.MCOptions.getABIName().empty() && 159296417Sdim "Unknown target-abi option!"); 160288943Sdim 161288943Sdim if (!TT.isMacOSX()) { 162288943Sdim switch (TT.getArch()) { 163288943Sdim case Triple::ppc64le: 164288943Sdim return PPCTargetMachine::PPC_ABI_ELFv2; 165288943Sdim case Triple::ppc64: 166288943Sdim return PPCTargetMachine::PPC_ABI_ELFv1; 167288943Sdim default: 168288943Sdim // Fallthrough. 169288943Sdim ; 170288943Sdim } 171288943Sdim } 172288943Sdim return PPCTargetMachine::PPC_ABI_UNKNOWN; 173288943Sdim} 174288943Sdim 175296417Sdim// The FeatureString here is a little subtle. We are modifying the feature 176296417Sdim// string with what are (currently) non-function specific overrides as it goes 177296417Sdim// into the LLVMTargetMachine constructor and then using the stored value in the 178280031Sdim// Subtarget constructor below it. 179288943SdimPPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, 180288943Sdim StringRef CPU, StringRef FS, 181288943Sdim const TargetOptions &Options, 182226633Sdim Reloc::Model RM, CodeModel::Model CM, 183280031Sdim CodeGenOpt::Level OL) 184288943Sdim : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, 185288943Sdim computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), 186288943Sdim TLOF(createTLOF(getTargetTriple())), 187288943Sdim TargetABI(computeTargetABI(TT, Options)), 188288943Sdim Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { 189288943Sdim 190288943Sdim // For the estimates, convergence is quadratic, so we essentially double the 191288943Sdim // number of digits correct after every iteration. For both FRE and FRSQRTE, 192288943Sdim // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 193288943Sdim // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 194288943Sdim unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, 195288943Sdim RefinementSteps64 = RefinementSteps + 1; 196288943Sdim 197288943Sdim this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps); 198288943Sdim this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps); 199288943Sdim this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps); 200288943Sdim this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps); 201288943Sdim 202288943Sdim this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64); 203288943Sdim this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64); 204288943Sdim this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64); 205288943Sdim this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64); 206288943Sdim 207261991Sdim initAsmInfo(); 208193323Sed} 209193323Sed 210280031SdimPPCTargetMachine::~PPCTargetMachine() {} 211280031Sdim 212234353Sdimvoid PPC32TargetMachine::anchor() { } 213193323Sed 214288943SdimPPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, 215226633Sdim StringRef CPU, StringRef FS, 216234353Sdim const TargetOptions &Options, 217234353Sdim Reloc::Model RM, CodeModel::Model CM, 218234353Sdim CodeGenOpt::Level OL) 219288943Sdim : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 220193323Sed 221234353Sdimvoid PPC64TargetMachine::anchor() { } 222193323Sed 223288943SdimPPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, 224288943Sdim StringRef CPU, StringRef FS, 225234353Sdim const TargetOptions &Options, 226234353Sdim Reloc::Model RM, CodeModel::Model CM, 227234353Sdim CodeGenOpt::Level OL) 228288943Sdim : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 229193323Sed 230280031Sdimconst PPCSubtarget * 231280031SdimPPCTargetMachine::getSubtargetImpl(const Function &F) const { 232288943Sdim Attribute CPUAttr = F.getFnAttribute("target-cpu"); 233288943Sdim Attribute FSAttr = F.getFnAttribute("target-features"); 234193323Sed 235280031Sdim std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 236280031Sdim ? CPUAttr.getValueAsString().str() 237280031Sdim : TargetCPU; 238280031Sdim std::string FS = !FSAttr.hasAttribute(Attribute::None) 239280031Sdim ? FSAttr.getValueAsString().str() 240280031Sdim : TargetFS; 241280031Sdim 242296417Sdim // FIXME: This is related to the code below to reset the target options, 243296417Sdim // we need to know whether or not the soft float flag is set on the 244296417Sdim // function before we can generate a subtarget. We also need to use 245296417Sdim // it as a key for the subtarget since that can be the only difference 246296417Sdim // between two functions. 247296417Sdim bool SoftFloat = 248296417Sdim F.hasFnAttribute("use-soft-float") && 249296417Sdim F.getFnAttribute("use-soft-float").getValueAsString() == "true"; 250296417Sdim // If the soft float attribute is set on the function turn on the soft float 251296417Sdim // subtarget feature. 252296417Sdim if (SoftFloat) 253296417Sdim FS += FS.empty() ? "+soft-float" : ",+soft-float"; 254296417Sdim 255280031Sdim auto &I = SubtargetMap[CPU + FS]; 256280031Sdim if (!I) { 257280031Sdim // This needs to be done before we create a new subtarget since any 258280031Sdim // creation will depend on the TM and the code generation flags on the 259280031Sdim // function that reside in TargetOptions. 260280031Sdim resetTargetOptions(F); 261288943Sdim I = llvm::make_unique<PPCSubtarget>( 262288943Sdim TargetTriple, CPU, 263288943Sdim // FIXME: It would be good to have the subtarget additions here 264288943Sdim // not necessary. Anything that turns them on/off (overrides) ends 265288943Sdim // up being put at the end of the feature string, but the defaults 266288943Sdim // shouldn't require adding them. Fixing this means pulling Feature64Bit 267288943Sdim // out of most of the target cpus in the .td file and making it set only 268288943Sdim // as part of initialization via the TargetTriple. 269288943Sdim computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); 270280031Sdim } 271280031Sdim return I.get(); 272280031Sdim} 273280031Sdim 274193323Sed//===----------------------------------------------------------------------===// 275193323Sed// Pass Pipeline Configuration 276193323Sed//===----------------------------------------------------------------------===// 277193323Sed 278234353Sdimnamespace { 279234353Sdim/// PPC Code Generator Pass Configuration Options. 280234353Sdimclass PPCPassConfig : public TargetPassConfig { 281234353Sdimpublic: 282234353Sdim PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) 283234353Sdim : TargetPassConfig(TM, PM) {} 284234353Sdim 285234353Sdim PPCTargetMachine &getPPCTargetMachine() const { 286234353Sdim return getTM<PPCTargetMachine>(); 287234353Sdim } 288234353Sdim 289280031Sdim void addIRPasses() override; 290276479Sdim bool addPreISel() override; 291276479Sdim bool addILPOpts() override; 292276479Sdim bool addInstSelector() override; 293288943Sdim void addMachineSSAOptimization() override; 294280031Sdim void addPreRegAlloc() override; 295280031Sdim void addPreSched2() override; 296280031Sdim void addPreEmitPass() override; 297234353Sdim}; 298234353Sdim} // namespace 299234353Sdim 300234353SdimTargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { 301239462Sdim return new PPCPassConfig(this, PM); 302239462Sdim} 303234353Sdim 304280031Sdimvoid PPCPassConfig::addIRPasses() { 305296417Sdim if (TM->getOptLevel() != CodeGenOpt::None) 306296417Sdim addPass(createPPCBoolRetToIntPass()); 307280031Sdim addPass(createAtomicExpandPass(&getPPCTargetMachine())); 308280031Sdim 309288943Sdim // For the BG/Q (or if explicitly requested), add explicit data prefetch 310288943Sdim // intrinsics. 311288943Sdim bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ && 312288943Sdim getOptLevel() != CodeGenOpt::None; 313288943Sdim if (EnablePrefetch.getNumOccurrences() > 0) 314288943Sdim UsePrefetching = EnablePrefetch; 315288943Sdim if (UsePrefetching) 316288943Sdim addPass(createPPCLoopDataPrefetchPass()); 317288943Sdim 318280031Sdim if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { 319280031Sdim // Call SeparateConstOffsetFromGEP pass to extract constants within indices 320280031Sdim // and lower a GEP with multiple indices to either arithmetic operations or 321280031Sdim // multiple GEPs with single index. 322280031Sdim addPass(createSeparateConstOffsetFromGEPPass(TM, true)); 323280031Sdim // Call EarlyCSE pass to find and remove subexpressions in the lowered 324280031Sdim // result. 325280031Sdim addPass(createEarlyCSEPass()); 326280031Sdim // Do loop invariant code motion in case part of the lowered result is 327280031Sdim // invariant. 328280031Sdim addPass(createLICMPass()); 329280031Sdim } 330280031Sdim 331280031Sdim TargetPassConfig::addIRPasses(); 332280031Sdim} 333280031Sdim 334261991Sdimbool PPCPassConfig::addPreISel() { 335288943Sdim if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) 336288943Sdim addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); 337288943Sdim 338239462Sdim if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 339261991Sdim addPass(createPPCCTRLoops(getPPCTargetMachine())); 340234353Sdim 341239462Sdim return false; 342234353Sdim} 343234353Sdim 344251662Sdimbool PPCPassConfig::addILPOpts() { 345276479Sdim addPass(&EarlyIfConverterID); 346296417Sdim 347296417Sdim if (EnableMachineCombinerPass) 348296417Sdim addPass(&MachineCombinerID); 349296417Sdim 350276479Sdim return true; 351251662Sdim} 352251662Sdim 353234353Sdimbool PPCPassConfig::addInstSelector() { 354193323Sed // Install an instruction selector. 355239462Sdim addPass(createPPCISelDag(getPPCTargetMachine())); 356261991Sdim 357261991Sdim#ifndef NDEBUG 358261991Sdim if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 359261991Sdim addPass(createPPCCTRLoopsVerify()); 360261991Sdim#endif 361261991Sdim 362276479Sdim addPass(createPPCVSXCopyPass()); 363193323Sed return false; 364193323Sed} 365193323Sed 366288943Sdimvoid PPCPassConfig::addMachineSSAOptimization() { 367288943Sdim TargetPassConfig::addMachineSSAOptimization(); 368288943Sdim // For little endian, remove where possible the vector swap instructions 369288943Sdim // introduced at code generation to normalize vector element order. 370288943Sdim if (TM->getTargetTriple().getArch() == Triple::ppc64le && 371288943Sdim !DisableVSXSwapRemoval) 372288943Sdim addPass(createPPCVSXSwapRemovalPass()); 373296417Sdim // Target-specific peephole cleanups performed after instruction 374296417Sdim // selection. 375296417Sdim if (!DisableMIPeephole) { 376296417Sdim addPass(createPPCMIPeepholePass()); 377296417Sdim addPass(&DeadMachineInstructionElimID); 378296417Sdim } 379288943Sdim} 380288943Sdim 381280031Sdimvoid PPCPassConfig::addPreRegAlloc() { 382276479Sdim initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); 383276479Sdim insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, 384276479Sdim &PPCVSXFMAMutateID); 385288943Sdim if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_) 386288943Sdim addPass(createPPCTLSDynamicCallPass()); 387288943Sdim if (EnableExtraTOCRegDeps) 388288943Sdim addPass(createPPCTOCRegDepsPass()); 389276479Sdim} 390276479Sdim 391280031Sdimvoid PPCPassConfig::addPreSched2() { 392251662Sdim if (getOptLevel() != CodeGenOpt::None) 393251662Sdim addPass(&IfConverterID); 394251662Sdim} 395251662Sdim 396280031Sdimvoid PPCPassConfig::addPreEmitPass() { 397251662Sdim if (getOptLevel() != CodeGenOpt::None) 398280031Sdim addPass(createPPCEarlyReturnPass(), false); 399193323Sed // Must run branch selection immediately preceding the asm printer. 400280031Sdim addPass(createPPCBranchSelectionPass(), false); 401193323Sed} 402193323Sed 403288943SdimTargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() { 404296417Sdim return TargetIRAnalysis([this](const Function &F) { 405296417Sdim return TargetTransformInfo(PPCTTIImpl(this, F)); 406296417Sdim }); 407249423Sdim} 408