1//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Top-level implementation for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXTargetMachine.h"
15#include "MCTargetDesc/NVPTXMCAsmInfo.h"
16#include "NVPTX.h"
17#include "NVPTXAllocaHoisting.h"
18#include "NVPTXLowerAggrCopies.h"
19#include "NVPTXTargetObjectFile.h"
20#include "NVPTXTargetTransformInfo.h"
21#include "llvm/Analysis/Passes.h"
22#include "llvm/CodeGen/AsmPrinter.h"
23#include "llvm/CodeGen/MachineFunctionAnalysis.h"
24#include "llvm/CodeGen/MachineModuleInfo.h"
25#include "llvm/CodeGen/Passes.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/IRPrintingPasses.h"
28#include "llvm/IR/LegacyPassManager.h"
29#include "llvm/IR/Verifier.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/MC/MCInstrInfo.h"
32#include "llvm/MC/MCStreamer.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/FormattedStream.h"
37#include "llvm/Support/TargetRegistry.h"
38#include "llvm/Support/raw_ostream.h"
39#include "llvm/Target/TargetInstrInfo.h"
40#include "llvm/Target/TargetLowering.h"
41#include "llvm/Target/TargetLoweringObjectFile.h"
42#include "llvm/Target/TargetMachine.h"
43#include "llvm/Target/TargetOptions.h"
44#include "llvm/Target/TargetRegisterInfo.h"
45#include "llvm/Target/TargetSubtargetInfo.h"
46#include "llvm/Transforms/Scalar.h"
47
48using namespace llvm;
49
50namespace llvm {
51void initializeNVVMReflectPass(PassRegistry&);
52void initializeGenericToNVVMPass(PassRegistry&);
53void initializeNVPTXAllocaHoistingPass(PassRegistry &);
54void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
55void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
56void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
57void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
58void initializeNVPTXLowerAllocaPass(PassRegistry &);
59}
60
61extern "C" void LLVMInitializeNVPTXTarget() {
62  // Register the target.
63  RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
64  RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
65
66  // FIXME: This pass is really intended to be invoked during IR optimization,
67  // but it's very NVPTX-specific.
68  PassRegistry &PR = *PassRegistry::getPassRegistry();
69  initializeNVVMReflectPass(PR);
70  initializeGenericToNVVMPass(PR);
71  initializeNVPTXAllocaHoistingPass(PR);
72  initializeNVPTXAssignValidGlobalNamesPass(PR);
73  initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
74  initializeNVPTXLowerKernelArgsPass(PR);
75  initializeNVPTXLowerAllocaPass(PR);
76  initializeNVPTXLowerAggrCopiesPass(PR);
77}
78
79static std::string computeDataLayout(bool is64Bit) {
80  std::string Ret = "e";
81
82  if (!is64Bit)
83    Ret += "-p:32:32";
84
85  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
86
87  return Ret;
88}
89
90NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
91                                       StringRef CPU, StringRef FS,
92                                       const TargetOptions &Options,
93                                       Reloc::Model RM, CodeModel::Model CM,
94                                       CodeGenOpt::Level OL, bool is64bit)
95    : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
96                        CM, OL),
97      is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
98      Subtarget(TT, CPU, FS, *this) {
99  if (TT.getOS() == Triple::NVCL)
100    drvInterface = NVPTX::NVCL;
101  else
102    drvInterface = NVPTX::CUDA;
103  initAsmInfo();
104}
105
106NVPTXTargetMachine::~NVPTXTargetMachine() {}
107
108void NVPTXTargetMachine32::anchor() {}
109
110NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
111                                           StringRef CPU, StringRef FS,
112                                           const TargetOptions &Options,
113                                           Reloc::Model RM, CodeModel::Model CM,
114                                           CodeGenOpt::Level OL)
115    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
116
117void NVPTXTargetMachine64::anchor() {}
118
119NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
120                                           StringRef CPU, StringRef FS,
121                                           const TargetOptions &Options,
122                                           Reloc::Model RM, CodeModel::Model CM,
123                                           CodeGenOpt::Level OL)
124    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
125
126namespace {
127class NVPTXPassConfig : public TargetPassConfig {
128public:
129  NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
130      : TargetPassConfig(TM, PM) {}
131
132  NVPTXTargetMachine &getNVPTXTargetMachine() const {
133    return getTM<NVPTXTargetMachine>();
134  }
135
136  void addIRPasses() override;
137  bool addInstSelector() override;
138  void addPostRegAlloc() override;
139  void addMachineSSAOptimization() override;
140
141  FunctionPass *createTargetRegisterAllocator(bool) override;
142  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
143  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
144
145private:
146  // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
147  void addEarlyCSEOrGVNPass();
148};
149} // end anonymous namespace
150
151TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
152  NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
153  return PassConfig;
154}
155
156TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
157  return TargetIRAnalysis([this](const Function &F) {
158    return TargetTransformInfo(NVPTXTTIImpl(this, F));
159  });
160}
161
162void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
163  if (getOptLevel() == CodeGenOpt::Aggressive)
164    addPass(createGVNPass());
165  else
166    addPass(createEarlyCSEPass());
167}
168
169void NVPTXPassConfig::addIRPasses() {
170  // The following passes are known to not play well with virtual regs hanging
171  // around after register allocation (which in our case, is *all* registers).
172  // We explicitly disable them here.  We do, however, need some functionality
173  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
174  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
175  disablePass(&PrologEpilogCodeInserterID);
176  disablePass(&MachineCopyPropagationID);
177  disablePass(&TailDuplicateID);
178
179  addPass(createNVVMReflectPass());
180  addPass(createNVPTXImageOptimizerPass());
181  addPass(createNVPTXAssignValidGlobalNamesPass());
182  addPass(createGenericToNVVMPass());
183
184  // === Propagate special address spaces ===
185  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
186  // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
187  // be eliminated by SROA.
188  addPass(createSROAPass());
189  addPass(createNVPTXLowerAllocaPass());
190  addPass(createNVPTXFavorNonGenericAddrSpacesPass());
191  // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
192  // them unused. We could remove dead code in an ad-hoc manner, but that
193  // requires manual work and might be error-prone.
194  addPass(createDeadCodeEliminationPass());
195
196  // === Straight-line scalar optimizations ===
197  addPass(createSeparateConstOffsetFromGEPPass());
198  addPass(createSpeculativeExecutionPass());
199  // ReassociateGEPs exposes more opportunites for SLSR. See
200  // the example in reassociate-geps-and-slsr.ll.
201  addPass(createStraightLineStrengthReducePass());
202  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
203  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
204  // for some of our benchmarks.
205  addEarlyCSEOrGVNPass();
206  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
207  addPass(createNaryReassociatePass());
208  // NaryReassociate on GEPs creates redundant common expressions, so run
209  // EarlyCSE after it.
210  addPass(createEarlyCSEPass());
211
212  // === LSR and other generic IR passes ===
213  TargetPassConfig::addIRPasses();
214  // EarlyCSE is not always strong enough to clean up what LSR produces. For
215  // example, GVN can combine
216  //
217  //   %0 = add %a, %b
218  //   %1 = add %b, %a
219  //
220  // and
221  //
222  //   %0 = shl nsw %a, 2
223  //   %1 = shl %a, 2
224  //
225  // but EarlyCSE can do neither of them.
226  addEarlyCSEOrGVNPass();
227}
228
229bool NVPTXPassConfig::addInstSelector() {
230  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
231
232  addPass(createLowerAggrCopies());
233  addPass(createAllocaHoisting());
234  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
235
236  if (!ST.hasImageHandles())
237    addPass(createNVPTXReplaceImageHandlesPass());
238
239  return false;
240}
241
242void NVPTXPassConfig::addPostRegAlloc() {
243  addPass(createNVPTXPrologEpilogPass(), false);
244  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
245  // index with VRFrame register. NVPTXPeephole need to be run after that and
246  // will replace VRFrame with VRFrameLocal when possible.
247  addPass(createNVPTXPeephole());
248}
249
250FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
251  return nullptr; // No reg alloc
252}
253
254void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
255  assert(!RegAllocPass && "NVPTX uses no regalloc!");
256  addPass(&PHIEliminationID);
257  addPass(&TwoAddressInstructionPassID);
258}
259
260void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
261  assert(!RegAllocPass && "NVPTX uses no regalloc!");
262
263  addPass(&ProcessImplicitDefsID);
264  addPass(&LiveVariablesID);
265  addPass(&MachineLoopInfoID);
266  addPass(&PHIEliminationID);
267
268  addPass(&TwoAddressInstructionPassID);
269  addPass(&RegisterCoalescerID);
270
271  // PreRA instruction scheduling.
272  if (addPass(&MachineSchedulerID))
273    printAndVerify("After Machine Scheduling");
274
275
276  addPass(&StackSlotColoringID);
277
278  // FIXME: Needs physical registers
279  //addPass(&PostRAMachineLICMID);
280
281  printAndVerify("After StackSlotColoring");
282}
283
284void NVPTXPassConfig::addMachineSSAOptimization() {
285  // Pre-ra tail duplication.
286  if (addPass(&EarlyTailDuplicateID))
287    printAndVerify("After Pre-RegAlloc TailDuplicate");
288
289  // Optimize PHIs before DCE: removing dead PHI cycles may make more
290  // instructions dead.
291  addPass(&OptimizePHIsID);
292
293  // This pass merges large allocas. StackSlotColoring is a different pass
294  // which merges spill slots.
295  addPass(&StackColoringID);
296
297  // If the target requests it, assign local variables to stack slots relative
298  // to one another and simplify frame index references where possible.
299  addPass(&LocalStackSlotAllocationID);
300
301  // With optimization, dead code should already be eliminated. However
302  // there is one known exception: lowered code for arguments that are only
303  // used by tail calls, where the tail calls reuse the incoming stack
304  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
305  addPass(&DeadMachineInstructionElimID);
306  printAndVerify("After codegen DCE pass");
307
308  // Allow targets to insert passes that improve instruction level parallelism,
309  // like if-conversion. Such passes will typically need dominator trees and
310  // loop info, just like LICM and CSE below.
311  if (addILPOpts())
312    printAndVerify("After ILP optimizations");
313
314  addPass(&MachineLICMID);
315  addPass(&MachineCSEID);
316
317  addPass(&MachineSinkingID);
318  printAndVerify("After Machine LICM, CSE and Sinking passes");
319
320  addPass(&PeepholeOptimizerID);
321  printAndVerify("After codegen peephole optimization pass");
322}
323