NVPTXTargetMachine.cpp revision 280031
1//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Top-level implementation for the NVPTX target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "NVPTXTargetMachine.h" 15#include "MCTargetDesc/NVPTXMCAsmInfo.h" 16#include "NVPTX.h" 17#include "NVPTXAllocaHoisting.h" 18#include "NVPTXLowerAggrCopies.h" 19#include "NVPTXTargetObjectFile.h" 20#include "llvm/Analysis/Passes.h" 21#include "llvm/CodeGen/AsmPrinter.h" 22#include "llvm/CodeGen/MachineFunctionAnalysis.h" 23#include "llvm/CodeGen/MachineModuleInfo.h" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/IR/DataLayout.h" 26#include "llvm/IR/IRPrintingPasses.h" 27#include "llvm/IR/Verifier.h" 28#include "llvm/MC/MCAsmInfo.h" 29#include "llvm/MC/MCInstrInfo.h" 30#include "llvm/MC/MCStreamer.h" 31#include "llvm/MC/MCSubtargetInfo.h" 32#include "llvm/PassManager.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/FormattedStream.h" 36#include "llvm/Support/TargetRegistry.h" 37#include "llvm/Support/raw_ostream.h" 38#include "llvm/Target/TargetInstrInfo.h" 39#include "llvm/Target/TargetLowering.h" 40#include "llvm/Target/TargetLoweringObjectFile.h" 41#include "llvm/Target/TargetMachine.h" 42#include "llvm/Target/TargetOptions.h" 43#include "llvm/Target/TargetRegisterInfo.h" 44#include "llvm/Target/TargetSubtargetInfo.h" 45#include "llvm/Transforms/Scalar.h" 46 47using namespace llvm; 48 49namespace llvm { 50void initializeNVVMReflectPass(PassRegistry&); 51void initializeGenericToNVVMPass(PassRegistry&); 52void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 53void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 54void initializeNVPTXLowerStructArgsPass(PassRegistry &); 55} 56 57extern "C" void LLVMInitializeNVPTXTarget() { 58 // Register the target. 59 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 60 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 61 62 // FIXME: This pass is really intended to be invoked during IR optimization, 63 // but it's very NVPTX-specific. 64 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 65 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 66 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 67 initializeNVPTXFavorNonGenericAddrSpacesPass( 68 *PassRegistry::getPassRegistry()); 69 initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); 70} 71 72NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 73 StringRef CPU, StringRef FS, 74 const TargetOptions &Options, 75 Reloc::Model RM, CodeModel::Model CM, 76 CodeGenOpt::Level OL, bool is64bit) 77 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 78 TLOF(make_unique<NVPTXTargetObjectFile>()), 79 Subtarget(TT, CPU, FS, *this, is64bit) { 80 initAsmInfo(); 81} 82 83NVPTXTargetMachine::~NVPTXTargetMachine() {} 84 85void NVPTXTargetMachine32::anchor() {} 86 87NVPTXTargetMachine32::NVPTXTargetMachine32( 88 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 89 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 90 CodeGenOpt::Level OL) 91 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 92 93void NVPTXTargetMachine64::anchor() {} 94 95NVPTXTargetMachine64::NVPTXTargetMachine64( 96 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 97 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 98 CodeGenOpt::Level OL) 99 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 100 101namespace { 102class NVPTXPassConfig : public TargetPassConfig { 103public: 104 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 105 : TargetPassConfig(TM, PM) {} 106 107 NVPTXTargetMachine &getNVPTXTargetMachine() const { 108 return getTM<NVPTXTargetMachine>(); 109 } 110 111 void addIRPasses() override; 112 bool addInstSelector() override; 113 void addPostRegAlloc() override; 114 void addMachineSSAOptimization() override; 115 116 FunctionPass *createTargetRegisterAllocator(bool) override; 117 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 118 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 119}; 120} // end anonymous namespace 121 122TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 123 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 124 return PassConfig; 125} 126 127void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) { 128 // Add first the target-independent BasicTTI pass, then our NVPTX pass. This 129 // allows the NVPTX pass to delegate to the target independent layer when 130 // appropriate. 131 PM.add(createBasicTargetTransformInfoPass(this)); 132 PM.add(createNVPTXTargetTransformInfoPass(this)); 133} 134 135void NVPTXPassConfig::addIRPasses() { 136 // The following passes are known to not play well with virtual regs hanging 137 // around after register allocation (which in our case, is *all* registers). 138 // We explicitly disable them here. We do, however, need some functionality 139 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 140 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 141 disablePass(&PrologEpilogCodeInserterID); 142 disablePass(&MachineCopyPropagationID); 143 disablePass(&BranchFolderPassID); 144 disablePass(&TailDuplicateID); 145 146 addPass(createNVPTXImageOptimizerPass()); 147 TargetPassConfig::addIRPasses(); 148 addPass(createNVPTXAssignValidGlobalNamesPass()); 149 addPass(createGenericToNVVMPass()); 150 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 151 addPass(createSeparateConstOffsetFromGEPPass()); 152 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 153 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 154 // significantly better code than EarlyCSE for some of our benchmarks. 155 if (getOptLevel() == CodeGenOpt::Aggressive) 156 addPass(createGVNPass()); 157 else 158 addPass(createEarlyCSEPass()); 159 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 160 // some dead code. We could remove dead code in an ad-hoc manner, but that 161 // requires manual work and might be error-prone. 162 // 163 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 164 // and leave them unused. 165 // 166 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 167 // old index and some of its intermediate results may become unused. 168 addPass(createDeadCodeEliminationPass()); 169} 170 171bool NVPTXPassConfig::addInstSelector() { 172 const NVPTXSubtarget &ST = 173 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 174 175 addPass(createLowerAggrCopies()); 176 addPass(createAllocaHoisting()); 177 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 178 179 if (!ST.hasImageHandles()) 180 addPass(createNVPTXReplaceImageHandlesPass()); 181 182 return false; 183} 184 185void NVPTXPassConfig::addPostRegAlloc() { 186 addPass(createNVPTXPrologEpilogPass(), false); 187} 188 189FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 190 return nullptr; // No reg alloc 191} 192 193void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 194 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 195 addPass(&PHIEliminationID); 196 addPass(&TwoAddressInstructionPassID); 197} 198 199void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 200 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 201 202 addPass(&ProcessImplicitDefsID); 203 addPass(&LiveVariablesID); 204 addPass(&MachineLoopInfoID); 205 addPass(&PHIEliminationID); 206 207 addPass(&TwoAddressInstructionPassID); 208 addPass(&RegisterCoalescerID); 209 210 // PreRA instruction scheduling. 211 if (addPass(&MachineSchedulerID)) 212 printAndVerify("After Machine Scheduling"); 213 214 215 addPass(&StackSlotColoringID); 216 217 // FIXME: Needs physical registers 218 //addPass(&PostRAMachineLICMID); 219 220 printAndVerify("After StackSlotColoring"); 221} 222 223void NVPTXPassConfig::addMachineSSAOptimization() { 224 // Pre-ra tail duplication. 225 if (addPass(&EarlyTailDuplicateID)) 226 printAndVerify("After Pre-RegAlloc TailDuplicate"); 227 228 // Optimize PHIs before DCE: removing dead PHI cycles may make more 229 // instructions dead. 230 addPass(&OptimizePHIsID); 231 232 // This pass merges large allocas. StackSlotColoring is a different pass 233 // which merges spill slots. 234 addPass(&StackColoringID); 235 236 // If the target requests it, assign local variables to stack slots relative 237 // to one another and simplify frame index references where possible. 238 addPass(&LocalStackSlotAllocationID); 239 240 // With optimization, dead code should already be eliminated. However 241 // there is one known exception: lowered code for arguments that are only 242 // used by tail calls, where the tail calls reuse the incoming stack 243 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 244 addPass(&DeadMachineInstructionElimID); 245 printAndVerify("After codegen DCE pass"); 246 247 // Allow targets to insert passes that improve instruction level parallelism, 248 // like if-conversion. Such passes will typically need dominator trees and 249 // loop info, just like LICM and CSE below. 250 if (addILPOpts()) 251 printAndVerify("After ILP optimizations"); 252 253 addPass(&MachineLICMID); 254 addPass(&MachineCSEID); 255 256 addPass(&MachineSinkingID); 257 printAndVerify("After Machine LICM, CSE and Sinking passes"); 258 259 addPass(&PeepholeOptimizerID); 260 printAndVerify("After codegen peephole optimization pass"); 261} 262