NVPTXAsmPrinter.cpp revision 252723
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a printer that converts from our internal representation
11// of machine-dependent LLVM code to NVPTX assembly language.
12//
13//===----------------------------------------------------------------------===//
14
15#include "NVPTXAsmPrinter.h"
16#include "MCTargetDesc/NVPTXMCAsmInfo.h"
17#include "NVPTX.h"
18#include "NVPTXInstrInfo.h"
19#include "NVPTXNumRegisters.h"
20#include "NVPTXRegisterInfo.h"
21#include "NVPTXTargetMachine.h"
22#include "NVPTXUtilities.h"
23#include "cl_common_defines.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/Analysis/ConstantFolding.h"
26#include "llvm/Assembly/Writer.h"
27#include "llvm/CodeGen/Analysis.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/DebugInfo.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/GlobalVariable.h"
35#include "llvm/IR/Module.h"
36#include "llvm/IR/Operator.h"
37#include "llvm/MC/MCStreamer.h"
38#include "llvm/MC/MCSymbol.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/FormattedStream.h"
42#include "llvm/Support/Path.h"
43#include "llvm/Support/TargetRegistry.h"
44#include "llvm/Support/TimeValue.h"
45#include "llvm/Target/Mangler.h"
46#include "llvm/Target/TargetLoweringObjectFile.h"
47#include <sstream>
48using namespace llvm;
49
50#include "NVPTXGenAsmWriter.inc"
51
52bool RegAllocNilUsed = true;
53
54#define DEPOTNAME "__local_depot"
55
56static cl::opt<bool>
57EmitLineNumbers("nvptx-emit-line-numbers",
58                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
59                cl::init(true));
60
61namespace llvm { bool InterleaveSrcInPtx = false; }
62
63static cl::opt<bool, true>
64InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
65              cl::desc("NVPTX Specific: Emit source line in ptx file"),
66              cl::location(llvm::InterleaveSrcInPtx));
67
68namespace {
69/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
70/// depends.
71void DiscoverDependentGlobals(const Value *V,
72                              DenseSet<const GlobalVariable *> &Globals) {
73  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
74    Globals.insert(GV);
75  else {
76    if (const User *U = dyn_cast<User>(V)) {
77      for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
78        DiscoverDependentGlobals(U->getOperand(i), Globals);
79      }
80    }
81  }
82}
83
84/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
85/// instances to be emitted, but only after any dependents have been added
86/// first.
87void VisitGlobalVariableForEmission(
88    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
89    DenseSet<const GlobalVariable *> &Visited,
90    DenseSet<const GlobalVariable *> &Visiting) {
91  // Have we already visited this one?
92  if (Visited.count(GV))
93    return;
94
95  // Do we have a circular dependency?
96  if (Visiting.count(GV))
97    report_fatal_error("Circular dependency found in global variable set");
98
99  // Start visiting this global
100  Visiting.insert(GV);
101
102  // Make sure we visit all dependents first
103  DenseSet<const GlobalVariable *> Others;
104  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
105    DiscoverDependentGlobals(GV->getOperand(i), Others);
106
107  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
108                                                  E = Others.end();
109       I != E; ++I)
110    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
111
112  // Now we can visit ourself
113  Order.push_back(GV);
114  Visited.insert(GV);
115  Visiting.erase(GV);
116}
117}
118
119// @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
120// cannot just link to the existing version.
121/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
122///
123using namespace nvptx;
124const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
125  MCContext &Ctx = AP.OutContext;
126
127  if (CV->isNullValue() || isa<UndefValue>(CV))
128    return MCConstantExpr::Create(0, Ctx);
129
130  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
131    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
132
133  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
134    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
135
136  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
137    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
138
139  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
140  if (CE == 0)
141    llvm_unreachable("Unknown constant value to lower!");
142
143  switch (CE->getOpcode()) {
144  default:
145    // If the code isn't optimized, there may be outstanding folding
146    // opportunities. Attempt to fold the expression using DataLayout as a
147    // last resort before giving up.
148    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
149      if (C != CE)
150        return LowerConstant(C, AP);
151
152    // Otherwise report the problem to the user.
153    {
154      std::string S;
155      raw_string_ostream OS(S);
156      OS << "Unsupported expression in static initializer: ";
157      WriteAsOperand(OS, CE, /*PrintType=*/ false,
158                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
159      report_fatal_error(OS.str());
160    }
161  case Instruction::GetElementPtr: {
162    const DataLayout &TD = *AP.TM.getDataLayout();
163    // Generate a symbolic expression for the byte address
164    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
165    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
166
167    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
168    if (!OffsetAI)
169      return Base;
170
171    int64_t Offset = OffsetAI.getSExtValue();
172    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
173                                   Ctx);
174  }
175
176  case Instruction::Trunc:
177    // We emit the value and depend on the assembler to truncate the generated
178    // expression properly.  This is important for differences between
179    // blockaddress labels.  Since the two labels are in the same function, it
180    // is reasonable to treat their delta as a 32-bit value.
181  // FALL THROUGH.
182  case Instruction::BitCast:
183    return LowerConstant(CE->getOperand(0), AP);
184
185  case Instruction::IntToPtr: {
186    const DataLayout &TD = *AP.TM.getDataLayout();
187    // Handle casts to pointers by changing them into casts to the appropriate
188    // integer type.  This promotes constant folding and simplifies this code.
189    Constant *Op = CE->getOperand(0);
190    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
191                                      false /*ZExt*/);
192    return LowerConstant(Op, AP);
193  }
194
195  case Instruction::PtrToInt: {
196    const DataLayout &TD = *AP.TM.getDataLayout();
197    // Support only foldable casts to/from pointers that can be eliminated by
198    // changing the pointer to the appropriately sized integer type.
199    Constant *Op = CE->getOperand(0);
200    Type *Ty = CE->getType();
201
202    const MCExpr *OpExpr = LowerConstant(Op, AP);
203
204    // We can emit the pointer value into this slot if the slot is an
205    // integer slot equal to the size of the pointer.
206    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
207      return OpExpr;
208
209    // Otherwise the pointer is smaller than the resultant integer, mask off
210    // the high bits so we are sure to get a proper truncation if the input is
211    // a constant expr.
212    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
213    const MCExpr *MaskExpr =
214        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
215    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
216  }
217
218    // The MC library also has a right-shift operator, but it isn't consistently
219  // signed or unsigned between different targets.
220  case Instruction::Add:
221  case Instruction::Sub:
222  case Instruction::Mul:
223  case Instruction::SDiv:
224  case Instruction::SRem:
225  case Instruction::Shl:
226  case Instruction::And:
227  case Instruction::Or:
228  case Instruction::Xor: {
229    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
230    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
231    switch (CE->getOpcode()) {
232    default:
233      llvm_unreachable("Unknown binary operator constant cast expr");
234    case Instruction::Add:
235      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
236    case Instruction::Sub:
237      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
238    case Instruction::Mul:
239      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
240    case Instruction::SDiv:
241      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
242    case Instruction::SRem:
243      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
244    case Instruction::Shl:
245      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
246    case Instruction::And:
247      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
248    case Instruction::Or:
249      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
250    case Instruction::Xor:
251      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
252    }
253  }
254  }
255}
256
257void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
258  if (!EmitLineNumbers)
259    return;
260  if (ignoreLoc(MI))
261    return;
262
263  DebugLoc curLoc = MI.getDebugLoc();
264
265  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
266    return;
267
268  if (prevDebugLoc == curLoc)
269    return;
270
271  prevDebugLoc = curLoc;
272
273  if (curLoc.isUnknown())
274    return;
275
276  const MachineFunction *MF = MI.getParent()->getParent();
277  //const TargetMachine &TM = MF->getTarget();
278
279  const LLVMContext &ctx = MF->getFunction()->getContext();
280  DIScope Scope(curLoc.getScope(ctx));
281
282  if (!Scope.Verify())
283    return;
284
285  StringRef fileName(Scope.getFilename());
286  StringRef dirName(Scope.getDirectory());
287  SmallString<128> FullPathName = dirName;
288  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
289    sys::path::append(FullPathName, fileName);
290    fileName = FullPathName.str();
291  }
292
293  if (filenameMap.find(fileName.str()) == filenameMap.end())
294    return;
295
296  // Emit the line from the source file.
297  if (llvm::InterleaveSrcInPtx)
298    this->emitSrcInText(fileName.str(), curLoc.getLine());
299
300  std::stringstream temp;
301  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
302       << " " << curLoc.getCol();
303  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
304}
305
306void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
307  SmallString<128> Str;
308  raw_svector_ostream OS(Str);
309  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
310    emitLineNumberAsDotLoc(*MI);
311  printInstruction(MI, OS);
312  OutStreamer.EmitRawText(OS.str());
313}
314
315void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
316  const DataLayout *TD = TM.getDataLayout();
317  const TargetLowering *TLI = TM.getTargetLowering();
318
319  Type *Ty = F->getReturnType();
320
321  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
322
323  if (Ty->getTypeID() == Type::VoidTyID)
324    return;
325
326  O << " (";
327
328  if (isABI) {
329    if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
330      unsigned size = 0;
331      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
332        size = ITy->getBitWidth();
333        if (size < 32)
334          size = 32;
335      } else {
336        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
337        size = Ty->getPrimitiveSizeInBits();
338      }
339
340      O << ".param .b" << size << " func_retval0";
341    } else if (isa<PointerType>(Ty)) {
342      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
343        << " func_retval0";
344    } else {
345      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
346        SmallVector<EVT, 16> vtparts;
347        ComputeValueVTs(*TLI, Ty, vtparts);
348        unsigned totalsz = 0;
349        for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
350          unsigned elems = 1;
351          EVT elemtype = vtparts[i];
352          if (vtparts[i].isVector()) {
353            elems = vtparts[i].getVectorNumElements();
354            elemtype = vtparts[i].getVectorElementType();
355          }
356          for (unsigned j = 0, je = elems; j != je; ++j) {
357            unsigned sz = elemtype.getSizeInBits();
358            if (elemtype.isInteger() && (sz < 8))
359              sz = 8;
360            totalsz += sz / 8;
361          }
362        }
363        unsigned retAlignment = 0;
364        if (!llvm::getAlign(*F, 0, retAlignment))
365          retAlignment = TD->getABITypeAlignment(Ty);
366        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
367          << "]";
368      } else
369        assert(false && "Unknown return type");
370    }
371  } else {
372    SmallVector<EVT, 16> vtparts;
373    ComputeValueVTs(*TLI, Ty, vtparts);
374    unsigned idx = 0;
375    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
376      unsigned elems = 1;
377      EVT elemtype = vtparts[i];
378      if (vtparts[i].isVector()) {
379        elems = vtparts[i].getVectorNumElements();
380        elemtype = vtparts[i].getVectorElementType();
381      }
382
383      for (unsigned j = 0, je = elems; j != je; ++j) {
384        unsigned sz = elemtype.getSizeInBits();
385        if (elemtype.isInteger() && (sz < 32))
386          sz = 32;
387        O << ".reg .b" << sz << " func_retval" << idx;
388        if (j < je - 1)
389          O << ", ";
390        ++idx;
391      }
392      if (i < e - 1)
393        O << ", ";
394    }
395  }
396  O << ") ";
397  return;
398}
399
400void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
401                                        raw_ostream &O) {
402  const Function *F = MF.getFunction();
403  printReturnValStr(F, O);
404}
405
406void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
407  SmallString<128> Str;
408  raw_svector_ostream O(Str);
409
410  if (!GlobalsEmitted) {
411    emitGlobals(*MF->getFunction()->getParent());
412    GlobalsEmitted = true;
413  }
414
415  // Set up
416  MRI = &MF->getRegInfo();
417  F = MF->getFunction();
418  emitLinkageDirective(F, O);
419  if (llvm::isKernelFunction(*F))
420    O << ".entry ";
421  else {
422    O << ".func ";
423    printReturnValStr(*MF, O);
424  }
425
426  O << *CurrentFnSym;
427
428  emitFunctionParamList(*MF, O);
429
430  if (llvm::isKernelFunction(*F))
431    emitKernelFunctionDirectives(*F, O);
432
433  OutStreamer.EmitRawText(O.str());
434
435  prevDebugLoc = DebugLoc();
436}
437
438void NVPTXAsmPrinter::EmitFunctionBodyStart() {
439  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
440  unsigned numRegClasses = TRI.getNumRegClasses();
441  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
442  OutStreamer.EmitRawText(StringRef("{\n"));
443  setAndEmitFunctionVirtualRegisters(*MF);
444
445  SmallString<128> Str;
446  raw_svector_ostream O(Str);
447  emitDemotedVars(MF->getFunction(), O);
448  OutStreamer.EmitRawText(O.str());
449}
450
451void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
452  OutStreamer.EmitRawText(StringRef("}\n"));
453  delete[] VRidGlobal2LocalMap;
454}
455
456void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
457                                                   raw_ostream &O) const {
458  // If the NVVM IR has some of reqntid* specified, then output
459  // the reqntid directive, and set the unspecified ones to 1.
460  // If none of reqntid* is specified, don't output reqntid directive.
461  unsigned reqntidx, reqntidy, reqntidz;
462  bool specified = false;
463  if (llvm::getReqNTIDx(F, reqntidx) == false)
464    reqntidx = 1;
465  else
466    specified = true;
467  if (llvm::getReqNTIDy(F, reqntidy) == false)
468    reqntidy = 1;
469  else
470    specified = true;
471  if (llvm::getReqNTIDz(F, reqntidz) == false)
472    reqntidz = 1;
473  else
474    specified = true;
475
476  if (specified)
477    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
478      << "\n";
479
480  // If the NVVM IR has some of maxntid* specified, then output
481  // the maxntid directive, and set the unspecified ones to 1.
482  // If none of maxntid* is specified, don't output maxntid directive.
483  unsigned maxntidx, maxntidy, maxntidz;
484  specified = false;
485  if (llvm::getMaxNTIDx(F, maxntidx) == false)
486    maxntidx = 1;
487  else
488    specified = true;
489  if (llvm::getMaxNTIDy(F, maxntidy) == false)
490    maxntidy = 1;
491  else
492    specified = true;
493  if (llvm::getMaxNTIDz(F, maxntidz) == false)
494    maxntidz = 1;
495  else
496    specified = true;
497
498  if (specified)
499    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
500      << "\n";
501
502  unsigned mincta;
503  if (llvm::getMinCTASm(F, mincta))
504    O << ".minnctapersm " << mincta << "\n";
505}
506
507void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
508                                             raw_ostream &O) {
509  const TargetRegisterClass *RC = MRI->getRegClass(vr);
510  unsigned id = RC->getID();
511
512  std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
513  unsigned mapped_vr = regmap[vr];
514
515  if (!isVec) {
516    O << getNVPTXRegClassStr(RC) << mapped_vr;
517    return;
518  }
519  report_fatal_error("Bad register!");
520}
521
522void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
523                                          raw_ostream &O) {
524  getVirtualRegisterName(vr, isVec, O);
525}
526
527void NVPTXAsmPrinter::printVecModifiedImmediate(
528    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
529  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
530  int Imm = (int) MO.getImm();
531  if (0 == strcmp(Modifier, "vecelem"))
532    O << "_" << vecelem[Imm];
533  else if (0 == strcmp(Modifier, "vecv4comm1")) {
534    if ((Imm < 0) || (Imm > 3))
535      O << "//";
536  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
537    if ((Imm < 4) || (Imm > 7))
538      O << "//";
539  } else if (0 == strcmp(Modifier, "vecv4pos")) {
540    if (Imm < 0)
541      Imm = 0;
542    O << "_" << vecelem[Imm % 4];
543  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
544    if ((Imm < 0) || (Imm > 1))
545      O << "//";
546  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
547    if ((Imm < 2) || (Imm > 3))
548      O << "//";
549  } else if (0 == strcmp(Modifier, "vecv2pos")) {
550    if (Imm < 0)
551      Imm = 0;
552    O << "_" << vecelem[Imm % 2];
553  } else
554    llvm_unreachable("Unknown Modifier on immediate operand");
555}
556
557void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
558                                   raw_ostream &O, const char *Modifier) {
559  const MachineOperand &MO = MI->getOperand(opNum);
560  switch (MO.getType()) {
561  case MachineOperand::MO_Register:
562    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
563      if (MO.getReg() == NVPTX::VRDepot)
564        O << DEPOTNAME << getFunctionNumber();
565      else
566        O << getRegisterName(MO.getReg());
567    } else {
568      if (!Modifier)
569        emitVirtualRegister(MO.getReg(), false, O);
570      else {
571        if (strcmp(Modifier, "vecfull") == 0)
572          emitVirtualRegister(MO.getReg(), true, O);
573        else
574          llvm_unreachable(
575              "Don't know how to handle the modifier on virtual register.");
576      }
577    }
578    return;
579
580  case MachineOperand::MO_Immediate:
581    if (!Modifier)
582      O << MO.getImm();
583    else if (strstr(Modifier, "vec") == Modifier)
584      printVecModifiedImmediate(MO, Modifier, O);
585    else
586      llvm_unreachable(
587          "Don't know how to handle modifier on immediate operand");
588    return;
589
590  case MachineOperand::MO_FPImmediate:
591    printFPConstant(MO.getFPImm(), O);
592    break;
593
594  case MachineOperand::MO_GlobalAddress:
595    O << *Mang->getSymbol(MO.getGlobal());
596    break;
597
598  case MachineOperand::MO_ExternalSymbol: {
599    const char *symbname = MO.getSymbolName();
600    if (strstr(symbname, ".PARAM") == symbname) {
601      unsigned index;
602      sscanf(symbname + 6, "%u[];", &index);
603      printParamName(index, O);
604    } else if (strstr(symbname, ".HLPPARAM") == symbname) {
605      unsigned index;
606      sscanf(symbname + 9, "%u[];", &index);
607      O << *CurrentFnSym << "_param_" << index << "_offset";
608    } else
609      O << symbname;
610    break;
611  }
612
613  case MachineOperand::MO_MachineBasicBlock:
614    O << *MO.getMBB()->getSymbol();
615    return;
616
617  default:
618    llvm_unreachable("Operand type not supported.");
619  }
620}
621
622void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
623                                       raw_ostream &O) const {
624#ifndef __OPTIMIZE__
625  O << "\t// Implicit def :";
626  //printOperand(MI, 0);
627  O << "\n";
628#endif
629}
630
631void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
632                                      raw_ostream &O, const char *Modifier) {
633  printOperand(MI, opNum, O);
634
635  if (Modifier && !strcmp(Modifier, "add")) {
636    O << ", ";
637    printOperand(MI, opNum + 1, O);
638  } else {
639    if (MI->getOperand(opNum + 1).isImm() &&
640        MI->getOperand(opNum + 1).getImm() == 0)
641      return; // don't print ',0' or '+0'
642    O << "+";
643    printOperand(MI, opNum + 1, O);
644  }
645}
646
647void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
648                                    raw_ostream &O, const char *Modifier) {
649  if (Modifier) {
650    const MachineOperand &MO = MI->getOperand(opNum);
651    int Imm = (int) MO.getImm();
652    if (!strcmp(Modifier, "volatile")) {
653      if (Imm)
654        O << ".volatile";
655    } else if (!strcmp(Modifier, "addsp")) {
656      switch (Imm) {
657      case NVPTX::PTXLdStInstCode::GLOBAL:
658        O << ".global";
659        break;
660      case NVPTX::PTXLdStInstCode::SHARED:
661        O << ".shared";
662        break;
663      case NVPTX::PTXLdStInstCode::LOCAL:
664        O << ".local";
665        break;
666      case NVPTX::PTXLdStInstCode::PARAM:
667        O << ".param";
668        break;
669      case NVPTX::PTXLdStInstCode::CONSTANT:
670        O << ".const";
671        break;
672      case NVPTX::PTXLdStInstCode::GENERIC:
673        if (!nvptxSubtarget.hasGenericLdSt())
674          O << ".global";
675        break;
676      default:
677        llvm_unreachable("Wrong Address Space");
678      }
679    } else if (!strcmp(Modifier, "sign")) {
680      if (Imm == NVPTX::PTXLdStInstCode::Signed)
681        O << "s";
682      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
683        O << "u";
684      else
685        O << "f";
686    } else if (!strcmp(Modifier, "vec")) {
687      if (Imm == NVPTX::PTXLdStInstCode::V2)
688        O << ".v2";
689      else if (Imm == NVPTX::PTXLdStInstCode::V4)
690        O << ".v4";
691    } else
692      llvm_unreachable("Unknown Modifier");
693  } else
694    llvm_unreachable("Empty Modifier");
695}
696
697void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
698
699  emitLinkageDirective(F, O);
700  if (llvm::isKernelFunction(*F))
701    O << ".entry ";
702  else
703    O << ".func ";
704  printReturnValStr(F, O);
705  O << *Mang->getSymbol(F) << "\n";
706  emitFunctionParamList(F, O);
707  O << ";\n";
708}
709
710static bool usedInGlobalVarDef(const Constant *C) {
711  if (!C)
712    return false;
713
714  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
715    if (GV->getName().str() == "llvm.used")
716      return false;
717    return true;
718  }
719
720  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
721       ui != ue; ++ui) {
722    const Constant *C = dyn_cast<Constant>(*ui);
723    if (usedInGlobalVarDef(C))
724      return true;
725  }
726  return false;
727}
728
729static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
730  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
731    if (othergv->getName().str() == "llvm.used")
732      return true;
733  }
734
735  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
736    if (instr->getParent() && instr->getParent()->getParent()) {
737      const Function *curFunc = instr->getParent()->getParent();
738      if (oneFunc && (curFunc != oneFunc))
739        return false;
740      oneFunc = curFunc;
741      return true;
742    } else
743      return false;
744  }
745
746  if (const MDNode *md = dyn_cast<MDNode>(U))
747    if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
748                          (md->getName().str() == "llvm.dbg.sp")))
749      return true;
750
751  for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
752       ui != ue; ++ui) {
753    if (usedInOneFunc(*ui, oneFunc) == false)
754      return false;
755  }
756  return true;
757}
758
759/* Find out if a global variable can be demoted to local scope.
760 * Currently, this is valid for CUDA shared variables, which have local
761 * scope and global lifetime. So the conditions to check are :
762 * 1. Is the global variable in shared address space?
763 * 2. Does it have internal linkage?
764 * 3. Is the global variable referenced only in one function?
765 */
766static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
767  if (gv->hasInternalLinkage() == false)
768    return false;
769  const PointerType *Pty = gv->getType();
770  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
771    return false;
772
773  const Function *oneFunc = 0;
774
775  bool flag = usedInOneFunc(gv, oneFunc);
776  if (flag == false)
777    return false;
778  if (!oneFunc)
779    return false;
780  f = oneFunc;
781  return true;
782}
783
784static bool useFuncSeen(const Constant *C,
785                        llvm::DenseMap<const Function *, bool> &seenMap) {
786  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
787       ui != ue; ++ui) {
788    if (const Constant *cu = dyn_cast<Constant>(*ui)) {
789      if (useFuncSeen(cu, seenMap))
790        return true;
791    } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
792      const BasicBlock *bb = I->getParent();
793      if (!bb)
794        continue;
795      const Function *caller = bb->getParent();
796      if (!caller)
797        continue;
798      if (seenMap.find(caller) != seenMap.end())
799        return true;
800    }
801  }
802  return false;
803}
804
805void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
806  llvm::DenseMap<const Function *, bool> seenMap;
807  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
808    const Function *F = FI;
809
810    if (F->isDeclaration()) {
811      if (F->use_empty())
812        continue;
813      if (F->getIntrinsicID())
814        continue;
815      emitDeclaration(F, O);
816      continue;
817    }
818    for (Value::const_use_iterator iter = F->use_begin(),
819                                   iterEnd = F->use_end();
820         iter != iterEnd; ++iter) {
821      if (const Constant *C = dyn_cast<Constant>(*iter)) {
822        if (usedInGlobalVarDef(C)) {
823          // The use is in the initialization of a global variable
824          // that is a function pointer, so print a declaration
825          // for the original function
826          emitDeclaration(F, O);
827          break;
828        }
829        // Emit a declaration of this function if the function that
830        // uses this constant expr has already been seen.
831        if (useFuncSeen(C, seenMap)) {
832          emitDeclaration(F, O);
833          break;
834        }
835      }
836
837      if (!isa<Instruction>(*iter))
838        continue;
839      const Instruction *instr = cast<Instruction>(*iter);
840      const BasicBlock *bb = instr->getParent();
841      if (!bb)
842        continue;
843      const Function *caller = bb->getParent();
844      if (!caller)
845        continue;
846
847      // If a caller has already been seen, then the caller is
848      // appearing in the module before the callee. so print out
849      // a declaration for the callee.
850      if (seenMap.find(caller) != seenMap.end()) {
851        emitDeclaration(F, O);
852        break;
853      }
854    }
855    seenMap[F] = true;
856  }
857}
858
859void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
860  DebugInfoFinder DbgFinder;
861  DbgFinder.processModule(M);
862
863  unsigned i = 1;
864  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
865                                 E = DbgFinder.compile_unit_end();
866       I != E; ++I) {
867    DICompileUnit DIUnit(*I);
868    StringRef Filename(DIUnit.getFilename());
869    StringRef Dirname(DIUnit.getDirectory());
870    SmallString<128> FullPathName = Dirname;
871    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
872      sys::path::append(FullPathName, Filename);
873      Filename = FullPathName.str();
874    }
875    if (filenameMap.find(Filename.str()) != filenameMap.end())
876      continue;
877    filenameMap[Filename.str()] = i;
878    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
879    ++i;
880  }
881
882  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
883                                 E = DbgFinder.subprogram_end();
884       I != E; ++I) {
885    DISubprogram SP(*I);
886    StringRef Filename(SP.getFilename());
887    StringRef Dirname(SP.getDirectory());
888    SmallString<128> FullPathName = Dirname;
889    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
890      sys::path::append(FullPathName, Filename);
891      Filename = FullPathName.str();
892    }
893    if (filenameMap.find(Filename.str()) != filenameMap.end())
894      continue;
895    filenameMap[Filename.str()] = i;
896    ++i;
897  }
898}
899
900bool NVPTXAsmPrinter::doInitialization(Module &M) {
901
902  SmallString<128> Str1;
903  raw_svector_ostream OS1(Str1);
904
905  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
906  MMI->AnalyzeModule(M);
907
908  // We need to call the parent's one explicitly.
909  //bool Result = AsmPrinter::doInitialization(M);
910
911  // Initialize TargetLoweringObjectFile.
912  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
913      .Initialize(OutContext, TM);
914
915  Mang = new Mangler(OutContext, *TM.getDataLayout());
916
917  // Emit header before any dwarf directives are emitted below.
918  emitHeader(M, OS1);
919  OutStreamer.EmitRawText(OS1.str());
920
921  // Already commented out
922  //bool Result = AsmPrinter::doInitialization(M);
923
924  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
925    recordAndEmitFilenames(M);
926
927  GlobalsEmitted = false;
928
929  return false; // success
930}
931
932void NVPTXAsmPrinter::emitGlobals(const Module &M) {
933  SmallString<128> Str2;
934  raw_svector_ostream OS2(Str2);
935
936  emitDeclarations(M, OS2);
937
938  // As ptxas does not support forward references of globals, we need to first
939  // sort the list of module-level globals in def-use order. We visit each
940  // global variable in order, and ensure that we emit it *after* its dependent
941  // globals. We use a little extra memory maintaining both a set and a list to
942  // have fast searches while maintaining a strict ordering.
943  SmallVector<const GlobalVariable *, 8> Globals;
944  DenseSet<const GlobalVariable *> GVVisited;
945  DenseSet<const GlobalVariable *> GVVisiting;
946
947  // Visit each global variable, in order
948  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
949       I != E; ++I)
950    VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
951
952  assert(GVVisited.size() == M.getGlobalList().size() &&
953         "Missed a global variable");
954  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
955
956  // Print out module-level global variables in proper order
957  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
958    printModuleLevelGV(Globals[i], OS2);
959
960  OS2 << '\n';
961
962  OutStreamer.EmitRawText(OS2.str());
963}
964
965void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
966  O << "//\n";
967  O << "// Generated by LLVM NVPTX Back-End\n";
968  O << "//\n";
969  O << "\n";
970
971  unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
972  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
973
974  O << ".target ";
975  O << nvptxSubtarget.getTargetName();
976
977  if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
978    O << ", texmode_independent";
979  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
980    if (!nvptxSubtarget.hasDouble())
981      O << ", map_f64_to_f32";
982  }
983
984  if (MAI->doesSupportDebugInformation())
985    O << ", debug";
986
987  O << "\n";
988
989  O << ".address_size ";
990  if (nvptxSubtarget.is64Bit())
991    O << "64";
992  else
993    O << "32";
994  O << "\n";
995
996  O << "\n";
997}
998
999bool NVPTXAsmPrinter::doFinalization(Module &M) {
1000
1001  // If we did not emit any functions, then the global declarations have not
1002  // yet been emitted.
1003  if (!GlobalsEmitted) {
1004    emitGlobals(M);
1005    GlobalsEmitted = true;
1006  }
1007
1008  // XXX Temproarily remove global variables so that doFinalization() will not
1009  // emit them again (global variables are emitted at beginning).
1010
1011  Module::GlobalListType &global_list = M.getGlobalList();
1012  int i, n = global_list.size();
1013  GlobalVariable **gv_array = new GlobalVariable *[n];
1014
1015  // first, back-up GlobalVariable in gv_array
1016  i = 0;
1017  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
1018       I != E; ++I)
1019    gv_array[i++] = &*I;
1020
1021  // second, empty global_list
1022  while (!global_list.empty())
1023    global_list.remove(global_list.begin());
1024
1025  // call doFinalization
1026  bool ret = AsmPrinter::doFinalization(M);
1027
1028  // now we restore global variables
1029  for (i = 0; i < n; i++)
1030    global_list.insert(global_list.end(), gv_array[i]);
1031
1032  delete[] gv_array;
1033  return ret;
1034
1035  //bool Result = AsmPrinter::doFinalization(M);
1036  // Instead of calling the parents doFinalization, we may
1037  // clone parents doFinalization and customize here.
1038  // Currently, we if NVISA out the EmitGlobals() in
1039  // parent's doFinalization, which is too intrusive.
1040  //
1041  // Same for the doInitialization.
1042  //return Result;
1043}
1044
1045// This function emits appropriate linkage directives for
1046// functions and global variables.
1047//
1048// extern function declaration            -> .extern
1049// extern function definition             -> .visible
1050// external global variable with init     -> .visible
1051// external without init                  -> .extern
1052// appending                              -> not allowed, assert.
1053
1054void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
1055                                           raw_ostream &O) {
1056  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
1057    if (V->hasExternalLinkage()) {
1058      if (isa<GlobalVariable>(V)) {
1059        const GlobalVariable *GVar = cast<GlobalVariable>(V);
1060        if (GVar) {
1061          if (GVar->hasInitializer())
1062            O << ".visible ";
1063          else
1064            O << ".extern ";
1065        }
1066      } else if (V->isDeclaration())
1067        O << ".extern ";
1068      else
1069        O << ".visible ";
1070    } else if (V->hasAppendingLinkage()) {
1071      std::string msg;
1072      msg.append("Error: ");
1073      msg.append("Symbol ");
1074      if (V->hasName())
1075        msg.append(V->getName().str());
1076      msg.append("has unsupported appending linkage type");
1077      llvm_unreachable(msg.c_str());
1078    }
1079  }
1080}
1081
1082void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1083                                         raw_ostream &O,
1084                                         bool processDemoted) {
1085
1086  // Skip meta data
1087  if (GVar->hasSection()) {
1088    if (GVar->getSection() == "llvm.metadata")
1089      return;
1090  }
1091
1092  const DataLayout *TD = TM.getDataLayout();
1093
1094  // GlobalVariables are always constant pointers themselves.
1095  const PointerType *PTy = GVar->getType();
1096  Type *ETy = PTy->getElementType();
1097
1098  if (GVar->hasExternalLinkage()) {
1099    if (GVar->hasInitializer())
1100      O << ".visible ";
1101    else
1102      O << ".extern ";
1103  }
1104
1105  if (llvm::isTexture(*GVar)) {
1106    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1107    return;
1108  }
1109
1110  if (llvm::isSurface(*GVar)) {
1111    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1112    return;
1113  }
1114
1115  if (GVar->isDeclaration()) {
1116    // (extern) declarations, no definition or initializer
1117    // Currently the only known declaration is for an automatic __local
1118    // (.shared) promoted to global.
1119    emitPTXGlobalVariable(GVar, O);
1120    O << ";\n";
1121    return;
1122  }
1123
1124  if (llvm::isSampler(*GVar)) {
1125    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1126
1127    const Constant *Initializer = NULL;
1128    if (GVar->hasInitializer())
1129      Initializer = GVar->getInitializer();
1130    const ConstantInt *CI = NULL;
1131    if (Initializer)
1132      CI = dyn_cast<ConstantInt>(Initializer);
1133    if (CI) {
1134      unsigned sample = CI->getZExtValue();
1135
1136      O << " = { ";
1137
1138      for (int i = 0,
1139               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1140           i < 3; i++) {
1141        O << "addr_mode_" << i << " = ";
1142        switch (addr) {
1143        case 0:
1144          O << "wrap";
1145          break;
1146        case 1:
1147          O << "clamp_to_border";
1148          break;
1149        case 2:
1150          O << "clamp_to_edge";
1151          break;
1152        case 3:
1153          O << "wrap";
1154          break;
1155        case 4:
1156          O << "mirror";
1157          break;
1158        }
1159        O << ", ";
1160      }
1161      O << "filter_mode = ";
1162      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1163      case 0:
1164        O << "nearest";
1165        break;
1166      case 1:
1167        O << "linear";
1168        break;
1169      case 2:
1170        assert(0 && "Anisotropic filtering is not supported");
1171      default:
1172        O << "nearest";
1173        break;
1174      }
1175      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1176        O << ", force_unnormalized_coords = 1";
1177      }
1178      O << " }";
1179    }
1180
1181    O << ";\n";
1182    return;
1183  }
1184
1185  if (GVar->hasPrivateLinkage()) {
1186
1187    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1188      return;
1189
1190    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1191    if (!strncmp(GVar->getName().data(), "filename", 8))
1192      return;
1193    if (GVar->use_empty())
1194      return;
1195  }
1196
1197  const Function *demotedFunc = 0;
1198  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1199    O << "// " << GVar->getName().str() << " has been demoted\n";
1200    if (localDecls.find(demotedFunc) != localDecls.end())
1201      localDecls[demotedFunc].push_back(GVar);
1202    else {
1203      std::vector<const GlobalVariable *> temp;
1204      temp.push_back(GVar);
1205      localDecls[demotedFunc] = temp;
1206    }
1207    return;
1208  }
1209
1210  O << ".";
1211  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1212  if (GVar->getAlignment() == 0)
1213    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1214  else
1215    O << " .align " << GVar->getAlignment();
1216
1217  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
1218    O << " .";
1219    // Special case: ABI requires that we use .u8 for predicates
1220    if (ETy->isIntegerTy(1))
1221      O << "u8";
1222    else
1223      O << getPTXFundamentalTypeStr(ETy, false);
1224    O << " ";
1225    O << *Mang->getSymbol(GVar);
1226
1227    // Ptx allows variable initilization only for constant and global state
1228    // spaces.
1229    if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1230         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
1231         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1232        GVar->hasInitializer()) {
1233      const Constant *Initializer = GVar->getInitializer();
1234      if (!Initializer->isNullValue()) {
1235        O << " = ";
1236        printScalarConstant(Initializer, O);
1237      }
1238    }
1239  } else {
1240    unsigned int ElementSize = 0;
1241
1242    // Although PTX has direct support for struct type and array type and
1243    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1244    // targets that support these high level field accesses. Structs, arrays
1245    // and vectors are lowered into arrays of bytes.
1246    switch (ETy->getTypeID()) {
1247    case Type::StructTyID:
1248    case Type::ArrayTyID:
1249    case Type::VectorTyID:
1250      ElementSize = TD->getTypeStoreSize(ETy);
1251      // Ptx allows variable initilization only for constant and
1252      // global state spaces.
1253      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1254           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
1255           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1256          GVar->hasInitializer()) {
1257        const Constant *Initializer = GVar->getInitializer();
1258        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1259          AggBuffer aggBuffer(ElementSize, O, *this);
1260          bufferAggregateConstant(Initializer, &aggBuffer);
1261          if (aggBuffer.numSymbols) {
1262            if (nvptxSubtarget.is64Bit()) {
1263              O << " .u64 " << *Mang->getSymbol(GVar) << "[";
1264              O << ElementSize / 8;
1265            } else {
1266              O << " .u32 " << *Mang->getSymbol(GVar) << "[";
1267              O << ElementSize / 4;
1268            }
1269            O << "]";
1270          } else {
1271            O << " .b8 " << *Mang->getSymbol(GVar) << "[";
1272            O << ElementSize;
1273            O << "]";
1274          }
1275          O << " = {";
1276          aggBuffer.print();
1277          O << "}";
1278        } else {
1279          O << " .b8 " << *Mang->getSymbol(GVar);
1280          if (ElementSize) {
1281            O << "[";
1282            O << ElementSize;
1283            O << "]";
1284          }
1285        }
1286      } else {
1287        O << " .b8 " << *Mang->getSymbol(GVar);
1288        if (ElementSize) {
1289          O << "[";
1290          O << ElementSize;
1291          O << "]";
1292        }
1293      }
1294      break;
1295    default:
1296      assert(0 && "type not supported yet");
1297    }
1298
1299  }
1300  O << ";\n";
1301}
1302
1303void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1304  if (localDecls.find(f) == localDecls.end())
1305    return;
1306
1307  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1308
1309  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1310    O << "\t// demoted variable\n\t";
1311    printModuleLevelGV(gvars[i], O, true);
1312  }
1313}
1314
1315void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1316                                          raw_ostream &O) const {
1317  switch (AddressSpace) {
1318  case llvm::ADDRESS_SPACE_LOCAL:
1319    O << "local";
1320    break;
1321  case llvm::ADDRESS_SPACE_GLOBAL:
1322    O << "global";
1323    break;
1324  case llvm::ADDRESS_SPACE_CONST:
1325    // This logic should be consistent with that in
1326    // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
1327    if (nvptxSubtarget.hasGenericLdSt())
1328      O << "global";
1329    else
1330      O << "const";
1331    break;
1332  case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
1333    O << "const";
1334    break;
1335  case llvm::ADDRESS_SPACE_SHARED:
1336    O << "shared";
1337    break;
1338  default:
1339    report_fatal_error("Bad address space found while emitting PTX");
1340    break;
1341  }
1342}
1343
1344std::string
1345NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
1346  switch (Ty->getTypeID()) {
1347  default:
1348    llvm_unreachable("unexpected type");
1349    break;
1350  case Type::IntegerTyID: {
1351    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1352    if (NumBits == 1)
1353      return "pred";
1354    else if (NumBits <= 64) {
1355      std::string name = "u";
1356      return name + utostr(NumBits);
1357    } else {
1358      llvm_unreachable("Integer too large");
1359      break;
1360    }
1361    break;
1362  }
1363  case Type::FloatTyID:
1364    return "f32";
1365  case Type::DoubleTyID:
1366    return "f64";
1367  case Type::PointerTyID:
1368    if (nvptxSubtarget.is64Bit())
1369      if (useB4PTR)
1370        return "b64";
1371      else
1372        return "u64";
1373    else if (useB4PTR)
1374      return "b32";
1375    else
1376      return "u32";
1377  }
1378  llvm_unreachable("unexpected type");
1379  return NULL;
1380}
1381
1382void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1383                                            raw_ostream &O) {
1384
1385  const DataLayout *TD = TM.getDataLayout();
1386
1387  // GlobalVariables are always constant pointers themselves.
1388  const PointerType *PTy = GVar->getType();
1389  Type *ETy = PTy->getElementType();
1390
1391  O << ".";
1392  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1393  if (GVar->getAlignment() == 0)
1394    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1395  else
1396    O << " .align " << GVar->getAlignment();
1397
1398  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
1399    O << " .";
1400    O << getPTXFundamentalTypeStr(ETy);
1401    O << " ";
1402    O << *Mang->getSymbol(GVar);
1403    return;
1404  }
1405
1406  int64_t ElementSize = 0;
1407
1408  // Although PTX has direct support for struct type and array type and LLVM IR
1409  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1410  // support these high level field accesses. Structs and arrays are lowered
1411  // into arrays of bytes.
1412  switch (ETy->getTypeID()) {
1413  case Type::StructTyID:
1414  case Type::ArrayTyID:
1415  case Type::VectorTyID:
1416    ElementSize = TD->getTypeStoreSize(ETy);
1417    O << " .b8 " << *Mang->getSymbol(GVar) << "[";
1418    if (ElementSize) {
1419      O << itostr(ElementSize);
1420    }
1421    O << "]";
1422    break;
1423  default:
1424    assert(0 && "type not supported yet");
1425  }
1426  return;
1427}
1428
1429static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
1430  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
1431    return TD->getPrefTypeAlignment(Ty);
1432
1433  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
1434  if (ATy)
1435    return getOpenCLAlignment(TD, ATy->getElementType());
1436
1437  const VectorType *VTy = dyn_cast<VectorType>(Ty);
1438  if (VTy) {
1439    Type *ETy = VTy->getElementType();
1440    unsigned int numE = VTy->getNumElements();
1441    unsigned int alignE = TD->getPrefTypeAlignment(ETy);
1442    if (numE == 3)
1443      return 4 * alignE;
1444    else
1445      return numE * alignE;
1446  }
1447
1448  const StructType *STy = dyn_cast<StructType>(Ty);
1449  if (STy) {
1450    unsigned int alignStruct = 1;
1451    // Go through each element of the struct and find the
1452    // largest alignment.
1453    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1454      Type *ETy = STy->getElementType(i);
1455      unsigned int align = getOpenCLAlignment(TD, ETy);
1456      if (align > alignStruct)
1457        alignStruct = align;
1458    }
1459    return alignStruct;
1460  }
1461
1462  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
1463  if (FTy)
1464    return TD->getPointerPrefAlignment();
1465  return TD->getPrefTypeAlignment(Ty);
1466}
1467
1468void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1469                                     int paramIndex, raw_ostream &O) {
1470  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1471      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
1472    O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
1473  else {
1474    std::string argName = I->getName();
1475    const char *p = argName.c_str();
1476    while (*p) {
1477      if (*p == '.')
1478        O << "_";
1479      else
1480        O << *p;
1481      p++;
1482    }
1483  }
1484}
1485
1486void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
1487  Function::const_arg_iterator I, E;
1488  int i = 0;
1489
1490  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1491      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
1492    O << *CurrentFnSym << "_param_" << paramIndex;
1493    return;
1494  }
1495
1496  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
1497    if (i == paramIndex) {
1498      printParamName(I, paramIndex, O);
1499      return;
1500    }
1501  }
1502  llvm_unreachable("paramIndex out of bound");
1503}
1504
1505void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1506  const DataLayout *TD = TM.getDataLayout();
1507  const AttributeSet &PAL = F->getAttributes();
1508  const TargetLowering *TLI = TM.getTargetLowering();
1509  Function::const_arg_iterator I, E;
1510  unsigned paramIndex = 0;
1511  bool first = true;
1512  bool isKernelFunc = llvm::isKernelFunction(*F);
1513  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1514  MVT thePointerTy = TLI->getPointerTy();
1515
1516  O << "(\n";
1517
1518  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1519    Type *Ty = I->getType();
1520
1521    if (!first)
1522      O << ",\n";
1523
1524    first = false;
1525
1526    // Handle image/sampler parameters
1527    if (llvm::isSampler(*I) || llvm::isImage(*I)) {
1528      if (llvm::isImage(*I)) {
1529        std::string sname = I->getName();
1530        if (llvm::isImageWriteOnly(*I))
1531          O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
1532            << paramIndex;
1533        else // Default image is read_only
1534          O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
1535            << paramIndex;
1536      } else // Should be llvm::isSampler(*I)
1537        O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
1538          << paramIndex;
1539      continue;
1540    }
1541
1542    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
1543      if (Ty->isVectorTy()) {
1544        // Just print .param .b8 .align <a> .param[size];
1545        // <a> = PAL.getparamalignment
1546        // size = typeallocsize of element type
1547        unsigned align = PAL.getParamAlignment(paramIndex + 1);
1548        if (align == 0)
1549          align = TD->getABITypeAlignment(Ty);
1550
1551        unsigned sz = TD->getTypeAllocSize(Ty);
1552        O << "\t.param .align " << align << " .b8 ";
1553        printParamName(I, paramIndex, O);
1554        O << "[" << sz << "]";
1555
1556        continue;
1557      }
1558      // Just a scalar
1559      const PointerType *PTy = dyn_cast<PointerType>(Ty);
1560      if (isKernelFunc) {
1561        if (PTy) {
1562          // Special handling for pointer arguments to kernel
1563          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1564
1565          if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
1566            Type *ETy = PTy->getElementType();
1567            int addrSpace = PTy->getAddressSpace();
1568            switch (addrSpace) {
1569            default:
1570              O << ".ptr ";
1571              break;
1572            case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
1573              O << ".ptr .const ";
1574              break;
1575            case llvm::ADDRESS_SPACE_SHARED:
1576              O << ".ptr .shared ";
1577              break;
1578            case llvm::ADDRESS_SPACE_GLOBAL:
1579            case llvm::ADDRESS_SPACE_CONST:
1580              O << ".ptr .global ";
1581              break;
1582            }
1583            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
1584          }
1585          printParamName(I, paramIndex, O);
1586          continue;
1587        }
1588
1589        // non-pointer scalar to kernel func
1590        O << "\t.param .";
1591        // Special case: predicate operands become .u8 types
1592        if (Ty->isIntegerTy(1))
1593          O << "u8";
1594        else
1595          O << getPTXFundamentalTypeStr(Ty);
1596        O << " ";
1597        printParamName(I, paramIndex, O);
1598        continue;
1599      }
1600      // Non-kernel function, just print .param .b<size> for ABI
1601      // and .reg .b<size> for non ABY
1602      unsigned sz = 0;
1603      if (isa<IntegerType>(Ty)) {
1604        sz = cast<IntegerType>(Ty)->getBitWidth();
1605        if (sz < 32)
1606          sz = 32;
1607      } else if (isa<PointerType>(Ty))
1608        sz = thePointerTy.getSizeInBits();
1609      else
1610        sz = Ty->getPrimitiveSizeInBits();
1611      if (isABI)
1612        O << "\t.param .b" << sz << " ";
1613      else
1614        O << "\t.reg .b" << sz << " ";
1615      printParamName(I, paramIndex, O);
1616      continue;
1617    }
1618
1619    // param has byVal attribute. So should be a pointer
1620    const PointerType *PTy = dyn_cast<PointerType>(Ty);
1621    assert(PTy && "Param with byval attribute should be a pointer type");
1622    Type *ETy = PTy->getElementType();
1623
1624    if (isABI || isKernelFunc) {
1625      // Just print .param .b8 .align <a> .param[size];
1626      // <a> = PAL.getparamalignment
1627      // size = typeallocsize of element type
1628      unsigned align = PAL.getParamAlignment(paramIndex + 1);
1629      if (align == 0)
1630        align = TD->getABITypeAlignment(ETy);
1631
1632      unsigned sz = TD->getTypeAllocSize(ETy);
1633      O << "\t.param .align " << align << " .b8 ";
1634      printParamName(I, paramIndex, O);
1635      O << "[" << sz << "]";
1636      continue;
1637    } else {
1638      // Split the ETy into constituent parts and
1639      // print .param .b<size> <name> for each part.
1640      // Further, if a part is vector, print the above for
1641      // each vector element.
1642      SmallVector<EVT, 16> vtparts;
1643      ComputeValueVTs(*TLI, ETy, vtparts);
1644      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1645        unsigned elems = 1;
1646        EVT elemtype = vtparts[i];
1647        if (vtparts[i].isVector()) {
1648          elems = vtparts[i].getVectorNumElements();
1649          elemtype = vtparts[i].getVectorElementType();
1650        }
1651
1652        for (unsigned j = 0, je = elems; j != je; ++j) {
1653          unsigned sz = elemtype.getSizeInBits();
1654          if (elemtype.isInteger() && (sz < 32))
1655            sz = 32;
1656          O << "\t.reg .b" << sz << " ";
1657          printParamName(I, paramIndex, O);
1658          if (j < je - 1)
1659            O << ",\n";
1660          ++paramIndex;
1661        }
1662        if (i < e - 1)
1663          O << ",\n";
1664      }
1665      --paramIndex;
1666      continue;
1667    }
1668  }
1669
1670  O << "\n)\n";
1671}
1672
1673void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1674                                            raw_ostream &O) {
1675  const Function *F = MF.getFunction();
1676  emitFunctionParamList(F, O);
1677}
1678
1679void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1680    const MachineFunction &MF) {
1681  SmallString<128> Str;
1682  raw_svector_ostream O(Str);
1683
1684  // Map the global virtual register number to a register class specific
1685  // virtual register number starting from 1 with that class.
1686  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
1687  //unsigned numRegClasses = TRI->getNumRegClasses();
1688
1689  // Emit the Fake Stack Object
1690  const MachineFrameInfo *MFI = MF.getFrameInfo();
1691  int NumBytes = (int) MFI->getStackSize();
1692  if (NumBytes) {
1693    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1694      << getFunctionNumber() << "[" << NumBytes << "];\n";
1695    if (nvptxSubtarget.is64Bit()) {
1696      O << "\t.reg .b64 \t%SP;\n";
1697      O << "\t.reg .b64 \t%SPL;\n";
1698    } else {
1699      O << "\t.reg .b32 \t%SP;\n";
1700      O << "\t.reg .b32 \t%SPL;\n";
1701    }
1702  }
1703
1704  // Go through all virtual registers to establish the mapping between the
1705  // global virtual
1706  // register number and the per class virtual register number.
1707  // We use the per class virtual register number in the ptx output.
1708  unsigned int numVRs = MRI->getNumVirtRegs();
1709  for (unsigned i = 0; i < numVRs; i++) {
1710    unsigned int vr = TRI->index2VirtReg(i);
1711    const TargetRegisterClass *RC = MRI->getRegClass(vr);
1712    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
1713    int n = regmap.size();
1714    regmap.insert(std::make_pair(vr, n + 1));
1715  }
1716
1717  // Emit register declarations
1718  // @TODO: Extract out the real register usage
1719  O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1720  O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1721  O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1722  O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1723  O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
1724  O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1725  O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
1726
1727  // Emit declaration of the virtual registers or 'physical' registers for
1728  // each register class
1729  //for (unsigned i=0; i< numRegClasses; i++) {
1730  //    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
1731  //    const TargetRegisterClass *RC = TRI->getRegClass(i);
1732  //    std::string rcname = getNVPTXRegClassName(RC);
1733  //    std::string rcStr = getNVPTXRegClassStr(RC);
1734  //    //int n = regmap.size();
1735  //    if (!isNVPTXVectorRegClass(RC)) {
1736  //      O << "\t.reg " << rcname << " \t" << rcStr << "<"
1737  //        << NVPTXNumRegisters << ">;\n";
1738  //    }
1739
1740  // Only declare those registers that may be used. And do not emit vector
1741  // registers as
1742  // they are all elementized to scalar registers.
1743  //if (n && !isNVPTXVectorRegClass(RC)) {
1744  //    if (RegAllocNilUsed) {
1745  //        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1746  //          << ">;\n";
1747  //    }
1748  //    else {
1749  //        O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
1750  //          << "<" << 32 << ">;\n";
1751  //    }
1752  //}
1753  //}
1754
1755  OutStreamer.EmitRawText(O.str());
1756}
1757
1758void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1759  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1760  bool ignored;
1761  unsigned int numHex;
1762  const char *lead;
1763
1764  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1765    numHex = 8;
1766    lead = "0f";
1767    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
1768  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1769    numHex = 16;
1770    lead = "0d";
1771    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
1772  } else
1773    llvm_unreachable("unsupported fp type");
1774
1775  APInt API = APF.bitcastToAPInt();
1776  std::string hexstr(utohexstr(API.getZExtValue()));
1777  O << lead;
1778  if (hexstr.length() < numHex)
1779    O << std::string(numHex - hexstr.length(), '0');
1780  O << utohexstr(API.getZExtValue());
1781}
1782
1783void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1784  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1785    O << CI->getValue();
1786    return;
1787  }
1788  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1789    printFPConstant(CFP, O);
1790    return;
1791  }
1792  if (isa<ConstantPointerNull>(CPV)) {
1793    O << "0";
1794    return;
1795  }
1796  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1797    O << *Mang->getSymbol(GVar);
1798    return;
1799  }
1800  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1801    const Value *v = Cexpr->stripPointerCasts();
1802    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1803      O << *Mang->getSymbol(GVar);
1804      return;
1805    } else {
1806      O << *LowerConstant(CPV, *this);
1807      return;
1808    }
1809  }
1810  llvm_unreachable("Not scalar type found in printScalarConstant()");
1811}
1812
1813void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1814                                   AggBuffer *aggBuffer) {
1815
1816  const DataLayout *TD = TM.getDataLayout();
1817
1818  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1819    int s = TD->getTypeAllocSize(CPV->getType());
1820    if (s < Bytes)
1821      s = Bytes;
1822    aggBuffer->addZeros(s);
1823    return;
1824  }
1825
1826  unsigned char *ptr;
1827  switch (CPV->getType()->getTypeID()) {
1828
1829  case Type::IntegerTyID: {
1830    const Type *ETy = CPV->getType();
1831    if (ETy == Type::getInt8Ty(CPV->getContext())) {
1832      unsigned char c =
1833          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
1834      ptr = &c;
1835      aggBuffer->addBytes(ptr, 1, Bytes);
1836    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1837      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
1838      ptr = (unsigned char *)&int16;
1839      aggBuffer->addBytes(ptr, 2, Bytes);
1840    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1841      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1842        int int32 = (int)(constInt->getZExtValue());
1843        ptr = (unsigned char *)&int32;
1844        aggBuffer->addBytes(ptr, 4, Bytes);
1845        break;
1846      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1847        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1848                ConstantFoldConstantExpression(Cexpr, TD))) {
1849          int int32 = (int)(constInt->getZExtValue());
1850          ptr = (unsigned char *)&int32;
1851          aggBuffer->addBytes(ptr, 4, Bytes);
1852          break;
1853        }
1854        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1855          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1856          aggBuffer->addSymbol(v);
1857          aggBuffer->addZeros(4);
1858          break;
1859        }
1860      }
1861      llvm_unreachable("unsupported integer const type");
1862    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1863      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1864        long long int64 = (long long)(constInt->getZExtValue());
1865        ptr = (unsigned char *)&int64;
1866        aggBuffer->addBytes(ptr, 8, Bytes);
1867        break;
1868      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1869        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1870                ConstantFoldConstantExpression(Cexpr, TD))) {
1871          long long int64 = (long long)(constInt->getZExtValue());
1872          ptr = (unsigned char *)&int64;
1873          aggBuffer->addBytes(ptr, 8, Bytes);
1874          break;
1875        }
1876        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1877          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1878          aggBuffer->addSymbol(v);
1879          aggBuffer->addZeros(8);
1880          break;
1881        }
1882      }
1883      llvm_unreachable("unsupported integer const type");
1884    } else
1885      llvm_unreachable("unsupported integer const type");
1886    break;
1887  }
1888  case Type::FloatTyID:
1889  case Type::DoubleTyID: {
1890    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1891    const Type *Ty = CFP->getType();
1892    if (Ty == Type::getFloatTy(CPV->getContext())) {
1893      float float32 = (float) CFP->getValueAPF().convertToFloat();
1894      ptr = (unsigned char *)&float32;
1895      aggBuffer->addBytes(ptr, 4, Bytes);
1896    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1897      double float64 = CFP->getValueAPF().convertToDouble();
1898      ptr = (unsigned char *)&float64;
1899      aggBuffer->addBytes(ptr, 8, Bytes);
1900    } else {
1901      llvm_unreachable("unsupported fp const type");
1902    }
1903    break;
1904  }
1905  case Type::PointerTyID: {
1906    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1907      aggBuffer->addSymbol(GVar);
1908    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1909      const Value *v = Cexpr->stripPointerCasts();
1910      aggBuffer->addSymbol(v);
1911    }
1912    unsigned int s = TD->getTypeAllocSize(CPV->getType());
1913    aggBuffer->addZeros(s);
1914    break;
1915  }
1916
1917  case Type::ArrayTyID:
1918  case Type::VectorTyID:
1919  case Type::StructTyID: {
1920    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
1921        isa<ConstantStruct>(CPV)) {
1922      int ElementSize = TD->getTypeAllocSize(CPV->getType());
1923      bufferAggregateConstant(CPV, aggBuffer);
1924      if (Bytes > ElementSize)
1925        aggBuffer->addZeros(Bytes - ElementSize);
1926    } else if (isa<ConstantAggregateZero>(CPV))
1927      aggBuffer->addZeros(Bytes);
1928    else
1929      llvm_unreachable("Unexpected Constant type");
1930    break;
1931  }
1932
1933  default:
1934    llvm_unreachable("unsupported type");
1935  }
1936}
1937
1938void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1939                                              AggBuffer *aggBuffer) {
1940  const DataLayout *TD = TM.getDataLayout();
1941  int Bytes;
1942
1943  // Old constants
1944  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1945    if (CPV->getNumOperands())
1946      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1947        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1948    return;
1949  }
1950
1951  if (const ConstantDataSequential *CDS =
1952          dyn_cast<ConstantDataSequential>(CPV)) {
1953    if (CDS->getNumElements())
1954      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1955        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1956                     aggBuffer);
1957    return;
1958  }
1959
1960  if (isa<ConstantStruct>(CPV)) {
1961    if (CPV->getNumOperands()) {
1962      StructType *ST = cast<StructType>(CPV->getType());
1963      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1964        if (i == (e - 1))
1965          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
1966                  TD->getTypeAllocSize(ST) -
1967                  TD->getStructLayout(ST)->getElementOffset(i);
1968        else
1969          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
1970                  TD->getStructLayout(ST)->getElementOffset(i);
1971        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1972      }
1973    }
1974    return;
1975  }
1976  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1977}
1978
1979// buildTypeNameMap - Run through symbol table looking for type names.
1980//
1981
1982bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
1983
1984  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
1985
1986  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
1987                                  !PI->second.compare("struct._image2d_t") ||
1988                                  !PI->second.compare("struct._image3d_t")))
1989    return true;
1990
1991  return false;
1992}
1993
1994/// PrintAsmOperand - Print out an operand for an inline asm expression.
1995///
1996bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1997                                      unsigned AsmVariant,
1998                                      const char *ExtraCode, raw_ostream &O) {
1999  if (ExtraCode && ExtraCode[0]) {
2000    if (ExtraCode[1] != 0)
2001      return true; // Unknown modifier.
2002
2003    switch (ExtraCode[0]) {
2004    default:
2005      // See if this is a generic print operand
2006      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2007    case 'r':
2008      break;
2009    }
2010  }
2011
2012  printOperand(MI, OpNo, O);
2013
2014  return false;
2015}
2016
2017bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2018    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2019    const char *ExtraCode, raw_ostream &O) {
2020  if (ExtraCode && ExtraCode[0])
2021    return true; // Unknown modifier
2022
2023  O << '[';
2024  printMemOperand(MI, OpNo, O);
2025  O << ']';
2026
2027  return false;
2028}
2029
2030bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
2031  switch (MI.getOpcode()) {
2032  default:
2033    return false;
2034  case NVPTX::CallArgBeginInst:
2035  case NVPTX::CallArgEndInst0:
2036  case NVPTX::CallArgEndInst1:
2037  case NVPTX::CallArgF32:
2038  case NVPTX::CallArgF64:
2039  case NVPTX::CallArgI16:
2040  case NVPTX::CallArgI32:
2041  case NVPTX::CallArgI32imm:
2042  case NVPTX::CallArgI64:
2043  case NVPTX::CallArgI8:
2044  case NVPTX::CallArgParam:
2045  case NVPTX::CallVoidInst:
2046  case NVPTX::CallVoidInstReg:
2047  case NVPTX::Callseq_End:
2048  case NVPTX::CallVoidInstReg64:
2049  case NVPTX::DeclareParamInst:
2050  case NVPTX::DeclareRetMemInst:
2051  case NVPTX::DeclareRetRegInst:
2052  case NVPTX::DeclareRetScalarInst:
2053  case NVPTX::DeclareScalarParamInst:
2054  case NVPTX::DeclareScalarRegInst:
2055  case NVPTX::StoreParamF32:
2056  case NVPTX::StoreParamF64:
2057  case NVPTX::StoreParamI16:
2058  case NVPTX::StoreParamI32:
2059  case NVPTX::StoreParamI64:
2060  case NVPTX::StoreParamI8:
2061  case NVPTX::StoreParamS32I8:
2062  case NVPTX::StoreParamU32I8:
2063  case NVPTX::StoreParamS32I16:
2064  case NVPTX::StoreParamU32I16:
2065  case NVPTX::StoreRetvalF32:
2066  case NVPTX::StoreRetvalF64:
2067  case NVPTX::StoreRetvalI16:
2068  case NVPTX::StoreRetvalI32:
2069  case NVPTX::StoreRetvalI64:
2070  case NVPTX::StoreRetvalI8:
2071  case NVPTX::LastCallArgF32:
2072  case NVPTX::LastCallArgF64:
2073  case NVPTX::LastCallArgI16:
2074  case NVPTX::LastCallArgI32:
2075  case NVPTX::LastCallArgI32imm:
2076  case NVPTX::LastCallArgI64:
2077  case NVPTX::LastCallArgI8:
2078  case NVPTX::LastCallArgParam:
2079  case NVPTX::LoadParamMemF32:
2080  case NVPTX::LoadParamMemF64:
2081  case NVPTX::LoadParamMemI16:
2082  case NVPTX::LoadParamMemI32:
2083  case NVPTX::LoadParamMemI64:
2084  case NVPTX::LoadParamMemI8:
2085  case NVPTX::LoadParamRegF32:
2086  case NVPTX::LoadParamRegF64:
2087  case NVPTX::LoadParamRegI16:
2088  case NVPTX::LoadParamRegI32:
2089  case NVPTX::LoadParamRegI64:
2090  case NVPTX::LoadParamRegI8:
2091  case NVPTX::PrototypeInst:
2092  case NVPTX::DBG_VALUE:
2093    return true;
2094  }
2095  return false;
2096}
2097
2098// Force static initialization.
2099extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
2100  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2101  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2102}
2103
2104void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2105  std::stringstream temp;
2106  LineReader *reader = this->getReader(filename.str());
2107  temp << "\n//";
2108  temp << filename.str();
2109  temp << ":";
2110  temp << line;
2111  temp << " ";
2112  temp << reader->readLine(line);
2113  temp << "\n";
2114  this->OutStreamer.EmitRawText(Twine(temp.str()));
2115}
2116
2117LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
2118  if (reader == NULL) {
2119    reader = new LineReader(filename);
2120  }
2121
2122  if (reader->fileName() != filename) {
2123    delete reader;
2124    reader = new LineReader(filename);
2125  }
2126
2127  return reader;
2128}
2129
2130std::string LineReader::readLine(unsigned lineNum) {
2131  if (lineNum < theCurLine) {
2132    theCurLine = 0;
2133    fstr.seekg(0, std::ios::beg);
2134  }
2135  while (theCurLine < lineNum) {
2136    fstr.getline(buff, 500);
2137    theCurLine++;
2138  }
2139  return buff;
2140}
2141
2142// Force static initialization.
2143extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2144  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2145  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2146}
2147