NVPTXTargetMachine.cpp revision 288943
1//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Top-level implementation for the NVPTX target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "NVPTXTargetMachine.h" 15#include "MCTargetDesc/NVPTXMCAsmInfo.h" 16#include "NVPTX.h" 17#include "NVPTXAllocaHoisting.h" 18#include "NVPTXLowerAggrCopies.h" 19#include "NVPTXTargetObjectFile.h" 20#include "NVPTXTargetTransformInfo.h" 21#include "llvm/Analysis/Passes.h" 22#include "llvm/CodeGen/AsmPrinter.h" 23#include "llvm/CodeGen/MachineFunctionAnalysis.h" 24#include "llvm/CodeGen/MachineModuleInfo.h" 25#include "llvm/CodeGen/Passes.h" 26#include "llvm/IR/DataLayout.h" 27#include "llvm/IR/IRPrintingPasses.h" 28#include "llvm/IR/LegacyPassManager.h" 29#include "llvm/IR/Verifier.h" 30#include "llvm/MC/MCAsmInfo.h" 31#include "llvm/MC/MCInstrInfo.h" 32#include "llvm/MC/MCStreamer.h" 33#include "llvm/MC/MCSubtargetInfo.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/FormattedStream.h" 37#include "llvm/Support/TargetRegistry.h" 38#include "llvm/Support/raw_ostream.h" 39#include "llvm/Target/TargetInstrInfo.h" 40#include "llvm/Target/TargetLowering.h" 41#include "llvm/Target/TargetLoweringObjectFile.h" 42#include "llvm/Target/TargetMachine.h" 43#include "llvm/Target/TargetOptions.h" 44#include "llvm/Target/TargetRegisterInfo.h" 45#include "llvm/Target/TargetSubtargetInfo.h" 46#include "llvm/Transforms/Scalar.h" 47 48using namespace llvm; 49 50namespace llvm { 51void initializeNVVMReflectPass(PassRegistry&); 52void initializeGenericToNVVMPass(PassRegistry&); 53void initializeNVPTXAllocaHoistingPass(PassRegistry &); 54void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 55void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 56void initializeNVPTXLowerKernelArgsPass(PassRegistry &); 57void initializeNVPTXLowerAllocaPass(PassRegistry &); 58} 59 60extern "C" void LLVMInitializeNVPTXTarget() { 61 // Register the target. 62 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 63 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 64 65 // FIXME: This pass is really intended to be invoked during IR optimization, 66 // but it's very NVPTX-specific. 67 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 68 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 69 initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); 70 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 71 initializeNVPTXFavorNonGenericAddrSpacesPass( 72 *PassRegistry::getPassRegistry()); 73 initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); 74 initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry()); 75} 76 77static std::string computeDataLayout(bool is64Bit) { 78 std::string Ret = "e"; 79 80 if (!is64Bit) 81 Ret += "-p:32:32"; 82 83 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 84 85 return Ret; 86} 87 88NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, 89 StringRef CPU, StringRef FS, 90 const TargetOptions &Options, 91 Reloc::Model RM, CodeModel::Model CM, 92 CodeGenOpt::Level OL, bool is64bit) 93 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, 94 CM, OL), 95 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), 96 Subtarget(TT, CPU, FS, *this) { 97 if (TT.getOS() == Triple::NVCL) 98 drvInterface = NVPTX::NVCL; 99 else 100 drvInterface = NVPTX::CUDA; 101 initAsmInfo(); 102} 103 104NVPTXTargetMachine::~NVPTXTargetMachine() {} 105 106void NVPTXTargetMachine32::anchor() {} 107 108NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, 109 StringRef CPU, StringRef FS, 110 const TargetOptions &Options, 111 Reloc::Model RM, CodeModel::Model CM, 112 CodeGenOpt::Level OL) 113 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 114 115void NVPTXTargetMachine64::anchor() {} 116 117NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, 118 StringRef CPU, StringRef FS, 119 const TargetOptions &Options, 120 Reloc::Model RM, CodeModel::Model CM, 121 CodeGenOpt::Level OL) 122 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 123 124namespace { 125class NVPTXPassConfig : public TargetPassConfig { 126public: 127 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 128 : TargetPassConfig(TM, PM) {} 129 130 NVPTXTargetMachine &getNVPTXTargetMachine() const { 131 return getTM<NVPTXTargetMachine>(); 132 } 133 134 void addIRPasses() override; 135 bool addInstSelector() override; 136 void addPostRegAlloc() override; 137 void addMachineSSAOptimization() override; 138 139 FunctionPass *createTargetRegisterAllocator(bool) override; 140 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 141 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 142}; 143} // end anonymous namespace 144 145TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 146 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 147 return PassConfig; 148} 149 150TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 151 return TargetIRAnalysis([this](Function &F) { 152 return TargetTransformInfo(NVPTXTTIImpl(this, F)); 153 }); 154} 155 156void NVPTXPassConfig::addIRPasses() { 157 // The following passes are known to not play well with virtual regs hanging 158 // around after register allocation (which in our case, is *all* registers). 159 // We explicitly disable them here. We do, however, need some functionality 160 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 161 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 162 disablePass(&PrologEpilogCodeInserterID); 163 disablePass(&MachineCopyPropagationID); 164 disablePass(&BranchFolderPassID); 165 disablePass(&TailDuplicateID); 166 167 addPass(createNVPTXImageOptimizerPass()); 168 TargetPassConfig::addIRPasses(); 169 addPass(createNVPTXAssignValidGlobalNamesPass()); 170 addPass(createGenericToNVVMPass()); 171 addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); 172 // NVPTXLowerKernelArgs emits alloca for byval parameters which can often 173 // be eliminated by SROA. 174 addPass(createSROAPass()); 175 addPass(createNVPTXLowerAllocaPass()); 176 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 177 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave 178 // them unused. We could remove dead code in an ad-hoc manner, but that 179 // requires manual work and might be error-prone. 180 addPass(createDeadCodeEliminationPass()); 181 addPass(createSeparateConstOffsetFromGEPPass()); 182 // ReassociateGEPs exposes more opportunites for SLSR. See 183 // the example in reassociate-geps-and-slsr.ll. 184 addPass(createStraightLineStrengthReducePass()); 185 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 186 // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE 187 // for some of our benchmarks. 188 if (getOptLevel() == CodeGenOpt::Aggressive) 189 addPass(createGVNPass()); 190 else 191 addPass(createEarlyCSEPass()); 192 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 193 addPass(createNaryReassociatePass()); 194 // NaryReassociate on GEPs creates redundant common expressions, so run 195 // EarlyCSE after it. 196 addPass(createEarlyCSEPass()); 197} 198 199bool NVPTXPassConfig::addInstSelector() { 200 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 201 202 addPass(createLowerAggrCopies()); 203 addPass(createAllocaHoisting()); 204 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 205 206 if (!ST.hasImageHandles()) 207 addPass(createNVPTXReplaceImageHandlesPass()); 208 209 return false; 210} 211 212void NVPTXPassConfig::addPostRegAlloc() { 213 addPass(createNVPTXPrologEpilogPass(), false); 214 // NVPTXPrologEpilogPass calculates frame object offset and replace frame 215 // index with VRFrame register. NVPTXPeephole need to be run after that and 216 // will replace VRFrame with VRFrameLocal when possible. 217 addPass(createNVPTXPeephole()); 218} 219 220FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 221 return nullptr; // No reg alloc 222} 223 224void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 225 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 226 addPass(&PHIEliminationID); 227 addPass(&TwoAddressInstructionPassID); 228} 229 230void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 231 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 232 233 addPass(&ProcessImplicitDefsID); 234 addPass(&LiveVariablesID); 235 addPass(&MachineLoopInfoID); 236 addPass(&PHIEliminationID); 237 238 addPass(&TwoAddressInstructionPassID); 239 addPass(&RegisterCoalescerID); 240 241 // PreRA instruction scheduling. 242 if (addPass(&MachineSchedulerID)) 243 printAndVerify("After Machine Scheduling"); 244 245 246 addPass(&StackSlotColoringID); 247 248 // FIXME: Needs physical registers 249 //addPass(&PostRAMachineLICMID); 250 251 printAndVerify("After StackSlotColoring"); 252} 253 254void NVPTXPassConfig::addMachineSSAOptimization() { 255 // Pre-ra tail duplication. 256 if (addPass(&EarlyTailDuplicateID)) 257 printAndVerify("After Pre-RegAlloc TailDuplicate"); 258 259 // Optimize PHIs before DCE: removing dead PHI cycles may make more 260 // instructions dead. 261 addPass(&OptimizePHIsID); 262 263 // This pass merges large allocas. StackSlotColoring is a different pass 264 // which merges spill slots. 265 addPass(&StackColoringID); 266 267 // If the target requests it, assign local variables to stack slots relative 268 // to one another and simplify frame index references where possible. 269 addPass(&LocalStackSlotAllocationID); 270 271 // With optimization, dead code should already be eliminated. However 272 // there is one known exception: lowered code for arguments that are only 273 // used by tail calls, where the tail calls reuse the incoming stack 274 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 275 addPass(&DeadMachineInstructionElimID); 276 printAndVerify("After codegen DCE pass"); 277 278 // Allow targets to insert passes that improve instruction level parallelism, 279 // like if-conversion. Such passes will typically need dominator trees and 280 // loop info, just like LICM and CSE below. 281 if (addILPOpts()) 282 printAndVerify("After ILP optimizations"); 283 284 addPass(&MachineLICMID); 285 addPass(&MachineCSEID); 286 287 addPass(&MachineSinkingID); 288 printAndVerify("After Machine LICM, CSE and Sinking passes"); 289 290 addPass(&PeepholeOptimizerID); 291 printAndVerify("After codegen peephole optimization pass"); 292} 293