1239310Sdim//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2239310Sdim// 3239310Sdim// The LLVM Compiler Infrastructure 4239310Sdim// 5239310Sdim// This file is distributed under the University of Illinois Open Source 6239310Sdim// License. See LICENSE.TXT for details. 7239310Sdim// 8239310Sdim//===----------------------------------------------------------------------===// 9239310Sdim// 10239310Sdim// This file contains a printer that converts from our internal representation 11239310Sdim// of machine-dependent LLVM code to NVPTX assembly language. 12239310Sdim// 13239310Sdim//===----------------------------------------------------------------------===// 14239310Sdim 15239310Sdim#include "NVPTXAsmPrinter.h" 16252723Sdim#include "MCTargetDesc/NVPTXMCAsmInfo.h" 17239310Sdim#include "NVPTX.h" 18239310Sdim#include "NVPTXInstrInfo.h" 19263509Sdim#include "NVPTXMCExpr.h" 20252723Sdim#include "NVPTXRegisterInfo.h" 21239310Sdim#include "NVPTXTargetMachine.h" 22239310Sdim#include "NVPTXUtilities.h" 23263509Sdim#include "InstPrinter/NVPTXInstPrinter.h" 24252723Sdim#include "cl_common_defines.h" 25239310Sdim#include "llvm/ADT/StringExtras.h" 26252723Sdim#include "llvm/Analysis/ConstantFolding.h" 27252723Sdim#include "llvm/Assembly/Writer.h" 28239310Sdim#include "llvm/CodeGen/Analysis.h" 29239310Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 30239310Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 31252723Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 32252723Sdim#include "llvm/DebugInfo.h" 33252723Sdim#include "llvm/IR/DerivedTypes.h" 34252723Sdim#include "llvm/IR/Function.h" 35252723Sdim#include "llvm/IR/GlobalVariable.h" 36252723Sdim#include "llvm/IR/Module.h" 37252723Sdim#include "llvm/IR/Operator.h" 38239310Sdim#include "llvm/MC/MCStreamer.h" 39239310Sdim#include "llvm/MC/MCSymbol.h" 40252723Sdim#include "llvm/Support/CommandLine.h" 41239310Sdim#include "llvm/Support/ErrorHandling.h" 42239310Sdim#include "llvm/Support/FormattedStream.h" 43252723Sdim#include "llvm/Support/Path.h" 44252723Sdim#include "llvm/Support/TargetRegistry.h" 45239310Sdim#include "llvm/Support/TimeValue.h" 46252723Sdim#include "llvm/Target/Mangler.h" 47252723Sdim#include "llvm/Target/TargetLoweringObjectFile.h" 48239310Sdim#include <sstream> 49239310Sdimusing namespace llvm; 50239310Sdim 51239310Sdim#define DEPOTNAME "__local_depot" 52239310Sdim 53239310Sdimstatic cl::opt<bool> 54263509SdimEmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 55239310Sdim cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 56239310Sdim cl::init(true)); 57239310Sdim 58263509Sdimstatic cl::opt<bool> 59263509SdimInterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 60252723Sdim cl::desc("NVPTX Specific: Emit source line in ptx file"), 61263509Sdim cl::init(false)); 62239310Sdim 63245431Sdimnamespace { 64245431Sdim/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 65245431Sdim/// depends. 66252723Sdimvoid DiscoverDependentGlobals(const Value *V, 67252723Sdim DenseSet<const GlobalVariable *> &Globals) { 68252723Sdim if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 69245431Sdim Globals.insert(GV); 70245431Sdim else { 71252723Sdim if (const User *U = dyn_cast<User>(V)) { 72245431Sdim for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 73245431Sdim DiscoverDependentGlobals(U->getOperand(i), Globals); 74245431Sdim } 75245431Sdim } 76245431Sdim } 77245431Sdim} 78239310Sdim 79245431Sdim/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 80245431Sdim/// instances to be emitted, but only after any dependents have been added 81245431Sdim/// first. 82252723Sdimvoid VisitGlobalVariableForEmission( 83252723Sdim const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 84252723Sdim DenseSet<const GlobalVariable *> &Visited, 85252723Sdim DenseSet<const GlobalVariable *> &Visiting) { 86245431Sdim // Have we already visited this one? 87252723Sdim if (Visited.count(GV)) 88252723Sdim return; 89239310Sdim 90245431Sdim // Do we have a circular dependency? 91245431Sdim if (Visiting.count(GV)) 92245431Sdim report_fatal_error("Circular dependency found in global variable set"); 93245431Sdim 94245431Sdim // Start visiting this global 95245431Sdim Visiting.insert(GV); 96245431Sdim 97245431Sdim // Make sure we visit all dependents first 98252723Sdim DenseSet<const GlobalVariable *> Others; 99245431Sdim for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100245431Sdim DiscoverDependentGlobals(GV->getOperand(i), Others); 101252723Sdim 102252723Sdim for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103252723Sdim E = Others.end(); 104252723Sdim I != E; ++I) 105245431Sdim VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106245431Sdim 107245431Sdim // Now we can visit ourself 108245431Sdim Order.push_back(GV); 109245431Sdim Visited.insert(GV); 110245431Sdim Visiting.erase(GV); 111245431Sdim} 112245431Sdim} 113245431Sdim 114239310Sdim// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 115239310Sdim// cannot just link to the existing version. 116239310Sdim/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 117239310Sdim/// 118239310Sdimusing namespace nvptx; 119239310Sdimconst MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 120239310Sdim MCContext &Ctx = AP.OutContext; 121239310Sdim 122239310Sdim if (CV->isNullValue() || isa<UndefValue>(CV)) 123239310Sdim return MCConstantExpr::Create(0, Ctx); 124239310Sdim 125239310Sdim if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 126239310Sdim return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 127239310Sdim 128239310Sdim if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 129263509Sdim return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); 130239310Sdim 131239310Sdim if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 132239310Sdim return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 133239310Sdim 134239310Sdim const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 135239310Sdim if (CE == 0) 136239310Sdim llvm_unreachable("Unknown constant value to lower!"); 137239310Sdim 138239310Sdim switch (CE->getOpcode()) { 139239310Sdim default: 140239310Sdim // If the code isn't optimized, there may be outstanding folding 141245431Sdim // opportunities. Attempt to fold the expression using DataLayout as a 142239310Sdim // last resort before giving up. 143252723Sdim if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 144239310Sdim if (C != CE) 145239310Sdim return LowerConstant(C, AP); 146239310Sdim 147239310Sdim // Otherwise report the problem to the user. 148239310Sdim { 149252723Sdim std::string S; 150252723Sdim raw_string_ostream OS(S); 151252723Sdim OS << "Unsupported expression in static initializer: "; 152252723Sdim WriteAsOperand(OS, CE, /*PrintType=*/ false, 153252723Sdim !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 154252723Sdim report_fatal_error(OS.str()); 155239310Sdim } 156239310Sdim case Instruction::GetElementPtr: { 157245431Sdim const DataLayout &TD = *AP.TM.getDataLayout(); 158239310Sdim // Generate a symbolic expression for the byte address 159252723Sdim APInt OffsetAI(TD.getPointerSizeInBits(), 0); 160252723Sdim cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 161239310Sdim 162239310Sdim const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 163252723Sdim if (!OffsetAI) 164239310Sdim return Base; 165239310Sdim 166252723Sdim int64_t Offset = OffsetAI.getSExtValue(); 167239310Sdim return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 168239310Sdim Ctx); 169239310Sdim } 170239310Sdim 171239310Sdim case Instruction::Trunc: 172239310Sdim // We emit the value and depend on the assembler to truncate the generated 173239310Sdim // expression properly. This is important for differences between 174239310Sdim // blockaddress labels. Since the two labels are in the same function, it 175239310Sdim // is reasonable to treat their delta as a 32-bit value. 176252723Sdim // FALL THROUGH. 177239310Sdim case Instruction::BitCast: 178239310Sdim return LowerConstant(CE->getOperand(0), AP); 179239310Sdim 180239310Sdim case Instruction::IntToPtr: { 181245431Sdim const DataLayout &TD = *AP.TM.getDataLayout(); 182239310Sdim // Handle casts to pointers by changing them into casts to the appropriate 183239310Sdim // integer type. This promotes constant folding and simplifies this code. 184239310Sdim Constant *Op = CE->getOperand(0); 185239310Sdim Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 186252723Sdim false /*ZExt*/); 187239310Sdim return LowerConstant(Op, AP); 188239310Sdim } 189239310Sdim 190239310Sdim case Instruction::PtrToInt: { 191245431Sdim const DataLayout &TD = *AP.TM.getDataLayout(); 192239310Sdim // Support only foldable casts to/from pointers that can be eliminated by 193239310Sdim // changing the pointer to the appropriately sized integer type. 194239310Sdim Constant *Op = CE->getOperand(0); 195239310Sdim Type *Ty = CE->getType(); 196239310Sdim 197239310Sdim const MCExpr *OpExpr = LowerConstant(Op, AP); 198239310Sdim 199239310Sdim // We can emit the pointer value into this slot if the slot is an 200239310Sdim // integer slot equal to the size of the pointer. 201239310Sdim if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 202239310Sdim return OpExpr; 203239310Sdim 204239310Sdim // Otherwise the pointer is smaller than the resultant integer, mask off 205239310Sdim // the high bits so we are sure to get a proper truncation if the input is 206239310Sdim // a constant expr. 207239310Sdim unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 208252723Sdim const MCExpr *MaskExpr = 209252723Sdim MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 210239310Sdim return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 211239310Sdim } 212239310Sdim 213252723Sdim // The MC library also has a right-shift operator, but it isn't consistently 214239310Sdim // signed or unsigned between different targets. 215239310Sdim case Instruction::Add: 216239310Sdim case Instruction::Sub: 217239310Sdim case Instruction::Mul: 218239310Sdim case Instruction::SDiv: 219239310Sdim case Instruction::SRem: 220239310Sdim case Instruction::Shl: 221239310Sdim case Instruction::And: 222239310Sdim case Instruction::Or: 223239310Sdim case Instruction::Xor: { 224239310Sdim const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 225239310Sdim const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 226239310Sdim switch (CE->getOpcode()) { 227252723Sdim default: 228252723Sdim llvm_unreachable("Unknown binary operator constant cast expr"); 229252723Sdim case Instruction::Add: 230252723Sdim return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 231252723Sdim case Instruction::Sub: 232252723Sdim return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 233252723Sdim case Instruction::Mul: 234252723Sdim return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 235252723Sdim case Instruction::SDiv: 236252723Sdim return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 237252723Sdim case Instruction::SRem: 238252723Sdim return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 239252723Sdim case Instruction::Shl: 240252723Sdim return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 241252723Sdim case Instruction::And: 242252723Sdim return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 243252723Sdim case Instruction::Or: 244252723Sdim return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 245252723Sdim case Instruction::Xor: 246252723Sdim return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 247239310Sdim } 248239310Sdim } 249239310Sdim } 250239310Sdim} 251239310Sdim 252252723Sdimvoid NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 253239310Sdim if (!EmitLineNumbers) 254239310Sdim return; 255239310Sdim if (ignoreLoc(MI)) 256239310Sdim return; 257239310Sdim 258239310Sdim DebugLoc curLoc = MI.getDebugLoc(); 259239310Sdim 260239310Sdim if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 261239310Sdim return; 262239310Sdim 263239310Sdim if (prevDebugLoc == curLoc) 264239310Sdim return; 265239310Sdim 266239310Sdim prevDebugLoc = curLoc; 267239310Sdim 268239310Sdim if (curLoc.isUnknown()) 269239310Sdim return; 270239310Sdim 271239310Sdim const MachineFunction *MF = MI.getParent()->getParent(); 272239310Sdim //const TargetMachine &TM = MF->getTarget(); 273239310Sdim 274239310Sdim const LLVMContext &ctx = MF->getFunction()->getContext(); 275239310Sdim DIScope Scope(curLoc.getScope(ctx)); 276239310Sdim 277263509Sdim assert((!Scope || Scope.isScope()) && 278263509Sdim "Scope of a DebugLoc should be null or a DIScope."); 279263509Sdim if (!Scope) 280263509Sdim return; 281239310Sdim 282239310Sdim StringRef fileName(Scope.getFilename()); 283239310Sdim StringRef dirName(Scope.getDirectory()); 284239310Sdim SmallString<128> FullPathName = dirName; 285239310Sdim if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 286239310Sdim sys::path::append(FullPathName, fileName); 287239310Sdim fileName = FullPathName.str(); 288239310Sdim } 289239310Sdim 290239310Sdim if (filenameMap.find(fileName.str()) == filenameMap.end()) 291239310Sdim return; 292239310Sdim 293239310Sdim // Emit the line from the source file. 294263509Sdim if (InterleaveSrc) 295239310Sdim this->emitSrcInText(fileName.str(), curLoc.getLine()); 296239310Sdim 297239310Sdim std::stringstream temp; 298252723Sdim temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 299252723Sdim << " " << curLoc.getCol(); 300239310Sdim OutStreamer.EmitRawText(Twine(temp.str().c_str())); 301239310Sdim} 302239310Sdim 303239310Sdimvoid NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 304239310Sdim SmallString<128> Str; 305239310Sdim raw_svector_ostream OS(Str); 306239310Sdim if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 307239310Sdim emitLineNumberAsDotLoc(*MI); 308263509Sdim 309263509Sdim MCInst Inst; 310263509Sdim lowerToMCInst(MI, Inst); 311263509Sdim OutStreamer.EmitInstruction(Inst); 312239310Sdim} 313239310Sdim 314263509Sdimvoid NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 315263509Sdim OutMI.setOpcode(MI->getOpcode()); 316263509Sdim 317263509Sdim // Special: Do not mangle symbol operand of CALL_PROTOTYPE 318263509Sdim if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 319263509Sdim const MachineOperand &MO = MI->getOperand(0); 320263509Sdim OutMI.addOperand(GetSymbolRef(MO, 321263509Sdim OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); 322263509Sdim return; 323263509Sdim } 324263509Sdim 325263509Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 326263509Sdim const MachineOperand &MO = MI->getOperand(i); 327263509Sdim 328263509Sdim MCOperand MCOp; 329263509Sdim if (lowerOperand(MO, MCOp)) 330263509Sdim OutMI.addOperand(MCOp); 331263509Sdim } 332263509Sdim} 333263509Sdim 334263509Sdimbool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 335263509Sdim MCOperand &MCOp) { 336263509Sdim switch (MO.getType()) { 337263509Sdim default: llvm_unreachable("unknown operand type"); 338263509Sdim case MachineOperand::MO_Register: 339263509Sdim MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 340263509Sdim break; 341263509Sdim case MachineOperand::MO_Immediate: 342263509Sdim MCOp = MCOperand::CreateImm(MO.getImm()); 343263509Sdim break; 344263509Sdim case MachineOperand::MO_MachineBasicBlock: 345263509Sdim MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 346263509Sdim MO.getMBB()->getSymbol(), OutContext)); 347263509Sdim break; 348263509Sdim case MachineOperand::MO_ExternalSymbol: 349263509Sdim MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); 350263509Sdim break; 351263509Sdim case MachineOperand::MO_GlobalAddress: 352263509Sdim MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); 353263509Sdim break; 354263509Sdim case MachineOperand::MO_FPImmediate: { 355263509Sdim const ConstantFP *Cnt = MO.getFPImm(); 356263509Sdim APFloat Val = Cnt->getValueAPF(); 357263509Sdim 358263509Sdim switch (Cnt->getType()->getTypeID()) { 359263509Sdim default: report_fatal_error("Unsupported FP type"); break; 360263509Sdim case Type::FloatTyID: 361263509Sdim MCOp = MCOperand::CreateExpr( 362263509Sdim NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 363263509Sdim break; 364263509Sdim case Type::DoubleTyID: 365263509Sdim MCOp = MCOperand::CreateExpr( 366263509Sdim NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 367263509Sdim break; 368263509Sdim } 369263509Sdim break; 370263509Sdim } 371263509Sdim } 372263509Sdim return true; 373263509Sdim} 374263509Sdim 375263509Sdimunsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 376263509Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 377263509Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 378263509Sdim 379263509Sdim DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 380263509Sdim unsigned RegNum = RegMap[Reg]; 381263509Sdim 382263509Sdim // Encode the register class in the upper 4 bits 383263509Sdim // Must be kept in sync with NVPTXInstPrinter::printRegName 384263509Sdim unsigned Ret = 0; 385263509Sdim if (RC == &NVPTX::Int1RegsRegClass) { 386263509Sdim Ret = (1 << 28); 387263509Sdim } else if (RC == &NVPTX::Int16RegsRegClass) { 388263509Sdim Ret = (2 << 28); 389263509Sdim } else if (RC == &NVPTX::Int32RegsRegClass) { 390263509Sdim Ret = (3 << 28); 391263509Sdim } else if (RC == &NVPTX::Int64RegsRegClass) { 392263509Sdim Ret = (4 << 28); 393263509Sdim } else if (RC == &NVPTX::Float32RegsRegClass) { 394263509Sdim Ret = (5 << 28); 395263509Sdim } else if (RC == &NVPTX::Float64RegsRegClass) { 396263509Sdim Ret = (6 << 28); 397263509Sdim } else { 398263509Sdim report_fatal_error("Bad register class"); 399263509Sdim } 400263509Sdim 401263509Sdim // Insert the vreg number 402263509Sdim Ret |= (RegNum & 0x0FFFFFFF); 403263509Sdim return Ret; 404263509Sdim } else { 405263509Sdim // Some special-use registers are actually physical registers. 406263509Sdim // Encode this as the register class ID of 0 and the real register ID. 407263509Sdim return Reg & 0x0FFFFFFF; 408263509Sdim } 409263509Sdim} 410263509Sdim 411263509SdimMCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, 412263509Sdim const MCSymbol *Symbol) { 413263509Sdim const MCExpr *Expr; 414263509Sdim Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 415263509Sdim OutContext); 416263509Sdim return MCOperand::CreateExpr(Expr); 417263509Sdim} 418263509Sdim 419252723Sdimvoid NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 420245431Sdim const DataLayout *TD = TM.getDataLayout(); 421239310Sdim const TargetLowering *TLI = TM.getTargetLowering(); 422239310Sdim 423239310Sdim Type *Ty = F->getReturnType(); 424239310Sdim 425239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 426239310Sdim 427239310Sdim if (Ty->getTypeID() == Type::VoidTyID) 428239310Sdim return; 429239310Sdim 430239310Sdim O << " ("; 431239310Sdim 432239310Sdim if (isABI) { 433239310Sdim if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 434239310Sdim unsigned size = 0; 435239310Sdim if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 436239310Sdim size = ITy->getBitWidth(); 437252723Sdim if (size < 32) 438252723Sdim size = 32; 439239310Sdim } else { 440252723Sdim assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 441239310Sdim size = Ty->getPrimitiveSizeInBits(); 442239310Sdim } 443239310Sdim 444239310Sdim O << ".param .b" << size << " func_retval0"; 445252723Sdim } else if (isa<PointerType>(Ty)) { 446239310Sdim O << ".param .b" << TLI->getPointerTy().getSizeInBits() 447252723Sdim << " func_retval0"; 448239310Sdim } else { 449252723Sdim if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 450239310Sdim SmallVector<EVT, 16> vtparts; 451239310Sdim ComputeValueVTs(*TLI, Ty, vtparts); 452239310Sdim unsigned totalsz = 0; 453252723Sdim for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 454239310Sdim unsigned elems = 1; 455239310Sdim EVT elemtype = vtparts[i]; 456239310Sdim if (vtparts[i].isVector()) { 457239310Sdim elems = vtparts[i].getVectorNumElements(); 458239310Sdim elemtype = vtparts[i].getVectorElementType(); 459239310Sdim } 460252723Sdim for (unsigned j = 0, je = elems; j != je; ++j) { 461239310Sdim unsigned sz = elemtype.getSizeInBits(); 462252723Sdim if (elemtype.isInteger() && (sz < 8)) 463252723Sdim sz = 8; 464252723Sdim totalsz += sz / 8; 465239310Sdim } 466239310Sdim } 467239310Sdim unsigned retAlignment = 0; 468239310Sdim if (!llvm::getAlign(*F, 0, retAlignment)) 469239310Sdim retAlignment = TD->getABITypeAlignment(Ty); 470252723Sdim O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 471252723Sdim << "]"; 472239310Sdim } else 473252723Sdim assert(false && "Unknown return type"); 474239310Sdim } 475239310Sdim } else { 476239310Sdim SmallVector<EVT, 16> vtparts; 477239310Sdim ComputeValueVTs(*TLI, Ty, vtparts); 478239310Sdim unsigned idx = 0; 479252723Sdim for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 480239310Sdim unsigned elems = 1; 481239310Sdim EVT elemtype = vtparts[i]; 482239310Sdim if (vtparts[i].isVector()) { 483239310Sdim elems = vtparts[i].getVectorNumElements(); 484239310Sdim elemtype = vtparts[i].getVectorElementType(); 485239310Sdim } 486239310Sdim 487252723Sdim for (unsigned j = 0, je = elems; j != je; ++j) { 488239310Sdim unsigned sz = elemtype.getSizeInBits(); 489252723Sdim if (elemtype.isInteger() && (sz < 32)) 490252723Sdim sz = 32; 491239310Sdim O << ".reg .b" << sz << " func_retval" << idx; 492252723Sdim if (j < je - 1) 493252723Sdim O << ", "; 494239310Sdim ++idx; 495239310Sdim } 496252723Sdim if (i < e - 1) 497239310Sdim O << ", "; 498239310Sdim } 499239310Sdim } 500239310Sdim O << ") "; 501239310Sdim return; 502239310Sdim} 503239310Sdim 504239310Sdimvoid NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 505239310Sdim raw_ostream &O) { 506239310Sdim const Function *F = MF.getFunction(); 507239310Sdim printReturnValStr(F, O); 508239310Sdim} 509239310Sdim 510239310Sdimvoid NVPTXAsmPrinter::EmitFunctionEntryLabel() { 511239310Sdim SmallString<128> Str; 512239310Sdim raw_svector_ostream O(Str); 513239310Sdim 514252723Sdim if (!GlobalsEmitted) { 515252723Sdim emitGlobals(*MF->getFunction()->getParent()); 516252723Sdim GlobalsEmitted = true; 517252723Sdim } 518252723Sdim 519239310Sdim // Set up 520239310Sdim MRI = &MF->getRegInfo(); 521239310Sdim F = MF->getFunction(); 522252723Sdim emitLinkageDirective(F, O); 523239310Sdim if (llvm::isKernelFunction(*F)) 524239310Sdim O << ".entry "; 525239310Sdim else { 526239310Sdim O << ".func "; 527239310Sdim printReturnValStr(*MF, O); 528239310Sdim } 529239310Sdim 530239310Sdim O << *CurrentFnSym; 531239310Sdim 532239310Sdim emitFunctionParamList(*MF, O); 533239310Sdim 534239310Sdim if (llvm::isKernelFunction(*F)) 535239310Sdim emitKernelFunctionDirectives(*F, O); 536239310Sdim 537239310Sdim OutStreamer.EmitRawText(O.str()); 538239310Sdim 539239310Sdim prevDebugLoc = DebugLoc(); 540239310Sdim} 541239310Sdim 542239310Sdimvoid NVPTXAsmPrinter::EmitFunctionBodyStart() { 543263509Sdim VRegMapping.clear(); 544239310Sdim OutStreamer.EmitRawText(StringRef("{\n")); 545239310Sdim setAndEmitFunctionVirtualRegisters(*MF); 546239310Sdim 547239310Sdim SmallString<128> Str; 548239310Sdim raw_svector_ostream O(Str); 549239310Sdim emitDemotedVars(MF->getFunction(), O); 550239310Sdim OutStreamer.EmitRawText(O.str()); 551239310Sdim} 552239310Sdim 553239310Sdimvoid NVPTXAsmPrinter::EmitFunctionBodyEnd() { 554239310Sdim OutStreamer.EmitRawText(StringRef("}\n")); 555263509Sdim VRegMapping.clear(); 556239310Sdim} 557239310Sdim 558263509Sdimvoid NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 559263509Sdim unsigned RegNo = MI->getOperand(0).getReg(); 560263509Sdim const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 561263509Sdim if (TRI->isVirtualRegister(RegNo)) { 562263509Sdim OutStreamer.AddComment(Twine("implicit-def: ") + 563263509Sdim getVirtualRegisterName(RegNo)); 564263509Sdim } else { 565263509Sdim OutStreamer.AddComment(Twine("implicit-def: ") + 566263509Sdim TM.getRegisterInfo()->getName(RegNo)); 567263509Sdim } 568263509Sdim OutStreamer.AddBlankLine(); 569263509Sdim} 570263509Sdim 571252723Sdimvoid NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 572252723Sdim raw_ostream &O) const { 573239310Sdim // If the NVVM IR has some of reqntid* specified, then output 574239310Sdim // the reqntid directive, and set the unspecified ones to 1. 575239310Sdim // If none of reqntid* is specified, don't output reqntid directive. 576239310Sdim unsigned reqntidx, reqntidy, reqntidz; 577239310Sdim bool specified = false; 578252723Sdim if (llvm::getReqNTIDx(F, reqntidx) == false) 579252723Sdim reqntidx = 1; 580252723Sdim else 581252723Sdim specified = true; 582252723Sdim if (llvm::getReqNTIDy(F, reqntidy) == false) 583252723Sdim reqntidy = 1; 584252723Sdim else 585252723Sdim specified = true; 586252723Sdim if (llvm::getReqNTIDz(F, reqntidz) == false) 587252723Sdim reqntidz = 1; 588252723Sdim else 589252723Sdim specified = true; 590239310Sdim 591239310Sdim if (specified) 592252723Sdim O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 593252723Sdim << "\n"; 594239310Sdim 595239310Sdim // If the NVVM IR has some of maxntid* specified, then output 596239310Sdim // the maxntid directive, and set the unspecified ones to 1. 597239310Sdim // If none of maxntid* is specified, don't output maxntid directive. 598239310Sdim unsigned maxntidx, maxntidy, maxntidz; 599239310Sdim specified = false; 600252723Sdim if (llvm::getMaxNTIDx(F, maxntidx) == false) 601252723Sdim maxntidx = 1; 602252723Sdim else 603252723Sdim specified = true; 604252723Sdim if (llvm::getMaxNTIDy(F, maxntidy) == false) 605252723Sdim maxntidy = 1; 606252723Sdim else 607252723Sdim specified = true; 608252723Sdim if (llvm::getMaxNTIDz(F, maxntidz) == false) 609252723Sdim maxntidz = 1; 610252723Sdim else 611252723Sdim specified = true; 612239310Sdim 613239310Sdim if (specified) 614252723Sdim O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 615252723Sdim << "\n"; 616239310Sdim 617239310Sdim unsigned mincta; 618239310Sdim if (llvm::getMinCTASm(F, mincta)) 619239310Sdim O << ".minnctapersm " << mincta << "\n"; 620239310Sdim} 621239310Sdim 622263509Sdimstd::string 623263509SdimNVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 624263509Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 625239310Sdim 626263509Sdim std::string Name; 627263509Sdim raw_string_ostream NameStr(Name); 628239310Sdim 629263509Sdim VRegRCMap::const_iterator I = VRegMapping.find(RC); 630263509Sdim assert(I != VRegMapping.end() && "Bad register class"); 631263509Sdim const DenseMap<unsigned, unsigned> &RegMap = I->second; 632263509Sdim 633263509Sdim VRegMap::const_iterator VI = RegMap.find(Reg); 634263509Sdim assert(VI != RegMap.end() && "Bad virtual register"); 635263509Sdim unsigned MappedVR = VI->second; 636263509Sdim 637263509Sdim NameStr << getNVPTXRegClassStr(RC) << MappedVR; 638263509Sdim 639263509Sdim NameStr.flush(); 640263509Sdim return Name; 641239310Sdim} 642239310Sdim 643263509Sdimvoid NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 644252723Sdim raw_ostream &O) { 645263509Sdim O << getVirtualRegisterName(vr); 646239310Sdim} 647239310Sdim 648252723Sdimvoid NVPTXAsmPrinter::printVecModifiedImmediate( 649252723Sdim const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 650252723Sdim static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 651252723Sdim int Imm = (int) MO.getImm(); 652252723Sdim if (0 == strcmp(Modifier, "vecelem")) 653239310Sdim O << "_" << vecelem[Imm]; 654252723Sdim else if (0 == strcmp(Modifier, "vecv4comm1")) { 655252723Sdim if ((Imm < 0) || (Imm > 3)) 656239310Sdim O << "//"; 657252723Sdim } else if (0 == strcmp(Modifier, "vecv4comm2")) { 658252723Sdim if ((Imm < 4) || (Imm > 7)) 659239310Sdim O << "//"; 660252723Sdim } else if (0 == strcmp(Modifier, "vecv4pos")) { 661252723Sdim if (Imm < 0) 662252723Sdim Imm = 0; 663252723Sdim O << "_" << vecelem[Imm % 4]; 664252723Sdim } else if (0 == strcmp(Modifier, "vecv2comm1")) { 665252723Sdim if ((Imm < 0) || (Imm > 1)) 666239310Sdim O << "//"; 667252723Sdim } else if (0 == strcmp(Modifier, "vecv2comm2")) { 668252723Sdim if ((Imm < 2) || (Imm > 3)) 669239310Sdim O << "//"; 670252723Sdim } else if (0 == strcmp(Modifier, "vecv2pos")) { 671252723Sdim if (Imm < 0) 672252723Sdim Imm = 0; 673252723Sdim O << "_" << vecelem[Imm % 2]; 674252723Sdim } else 675239310Sdim llvm_unreachable("Unknown Modifier on immediate operand"); 676239310Sdim} 677239310Sdim 678239310Sdim 679239310Sdim 680252723Sdimvoid NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 681239310Sdim 682252723Sdim emitLinkageDirective(F, O); 683239310Sdim if (llvm::isKernelFunction(*F)) 684239310Sdim O << ".entry "; 685239310Sdim else 686239310Sdim O << ".func "; 687239310Sdim printReturnValStr(F, O); 688263509Sdim O << *getSymbol(F) << "\n"; 689239310Sdim emitFunctionParamList(F, O); 690239310Sdim O << ";\n"; 691239310Sdim} 692239310Sdim 693252723Sdimstatic bool usedInGlobalVarDef(const Constant *C) { 694239310Sdim if (!C) 695239310Sdim return false; 696239310Sdim 697239310Sdim if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 698239310Sdim if (GV->getName().str() == "llvm.used") 699239310Sdim return false; 700239310Sdim return true; 701239310Sdim } 702239310Sdim 703252723Sdim for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 704252723Sdim ui != ue; ++ui) { 705239310Sdim const Constant *C = dyn_cast<Constant>(*ui); 706239310Sdim if (usedInGlobalVarDef(C)) 707239310Sdim return true; 708239310Sdim } 709239310Sdim return false; 710239310Sdim} 711239310Sdim 712252723Sdimstatic bool usedInOneFunc(const User *U, Function const *&oneFunc) { 713239310Sdim if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 714239310Sdim if (othergv->getName().str() == "llvm.used") 715239310Sdim return true; 716239310Sdim } 717239310Sdim 718239310Sdim if (const Instruction *instr = dyn_cast<Instruction>(U)) { 719239310Sdim if (instr->getParent() && instr->getParent()->getParent()) { 720239310Sdim const Function *curFunc = instr->getParent()->getParent(); 721239310Sdim if (oneFunc && (curFunc != oneFunc)) 722239310Sdim return false; 723239310Sdim oneFunc = curFunc; 724239310Sdim return true; 725252723Sdim } else 726239310Sdim return false; 727239310Sdim } 728239310Sdim 729239310Sdim if (const MDNode *md = dyn_cast<MDNode>(U)) 730239310Sdim if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 731252723Sdim (md->getName().str() == "llvm.dbg.sp"))) 732239310Sdim return true; 733239310Sdim 734252723Sdim for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 735252723Sdim ui != ue; ++ui) { 736239310Sdim if (usedInOneFunc(*ui, oneFunc) == false) 737239310Sdim return false; 738239310Sdim } 739239310Sdim return true; 740239310Sdim} 741239310Sdim 742239310Sdim/* Find out if a global variable can be demoted to local scope. 743239310Sdim * Currently, this is valid for CUDA shared variables, which have local 744239310Sdim * scope and global lifetime. So the conditions to check are : 745239310Sdim * 1. Is the global variable in shared address space? 746239310Sdim * 2. Does it have internal linkage? 747239310Sdim * 3. Is the global variable referenced only in one function? 748239310Sdim */ 749239310Sdimstatic bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 750239310Sdim if (gv->hasInternalLinkage() == false) 751239310Sdim return false; 752239310Sdim const PointerType *Pty = gv->getType(); 753239310Sdim if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 754239310Sdim return false; 755239310Sdim 756239310Sdim const Function *oneFunc = 0; 757239310Sdim 758239310Sdim bool flag = usedInOneFunc(gv, oneFunc); 759239310Sdim if (flag == false) 760239310Sdim return false; 761239310Sdim if (!oneFunc) 762239310Sdim return false; 763239310Sdim f = oneFunc; 764239310Sdim return true; 765239310Sdim} 766239310Sdim 767239310Sdimstatic bool useFuncSeen(const Constant *C, 768239310Sdim llvm::DenseMap<const Function *, bool> &seenMap) { 769252723Sdim for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 770252723Sdim ui != ue; ++ui) { 771239310Sdim if (const Constant *cu = dyn_cast<Constant>(*ui)) { 772239310Sdim if (useFuncSeen(cu, seenMap)) 773239310Sdim return true; 774239310Sdim } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 775239310Sdim const BasicBlock *bb = I->getParent(); 776252723Sdim if (!bb) 777252723Sdim continue; 778239310Sdim const Function *caller = bb->getParent(); 779252723Sdim if (!caller) 780252723Sdim continue; 781239310Sdim if (seenMap.find(caller) != seenMap.end()) 782239310Sdim return true; 783239310Sdim } 784239310Sdim } 785239310Sdim return false; 786239310Sdim} 787239310Sdim 788252723Sdimvoid NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 789239310Sdim llvm::DenseMap<const Function *, bool> seenMap; 790252723Sdim for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 791239310Sdim const Function *F = FI; 792239310Sdim 793239310Sdim if (F->isDeclaration()) { 794239310Sdim if (F->use_empty()) 795239310Sdim continue; 796239310Sdim if (F->getIntrinsicID()) 797239310Sdim continue; 798239310Sdim emitDeclaration(F, O); 799239310Sdim continue; 800239310Sdim } 801252723Sdim for (Value::const_use_iterator iter = F->use_begin(), 802252723Sdim iterEnd = F->use_end(); 803252723Sdim iter != iterEnd; ++iter) { 804239310Sdim if (const Constant *C = dyn_cast<Constant>(*iter)) { 805239310Sdim if (usedInGlobalVarDef(C)) { 806239310Sdim // The use is in the initialization of a global variable 807239310Sdim // that is a function pointer, so print a declaration 808239310Sdim // for the original function 809239310Sdim emitDeclaration(F, O); 810239310Sdim break; 811239310Sdim } 812239310Sdim // Emit a declaration of this function if the function that 813239310Sdim // uses this constant expr has already been seen. 814239310Sdim if (useFuncSeen(C, seenMap)) { 815239310Sdim emitDeclaration(F, O); 816239310Sdim break; 817239310Sdim } 818239310Sdim } 819239310Sdim 820252723Sdim if (!isa<Instruction>(*iter)) 821252723Sdim continue; 822239310Sdim const Instruction *instr = cast<Instruction>(*iter); 823239310Sdim const BasicBlock *bb = instr->getParent(); 824252723Sdim if (!bb) 825252723Sdim continue; 826239310Sdim const Function *caller = bb->getParent(); 827252723Sdim if (!caller) 828252723Sdim continue; 829239310Sdim 830239310Sdim // If a caller has already been seen, then the caller is 831239310Sdim // appearing in the module before the callee. so print out 832239310Sdim // a declaration for the callee. 833239310Sdim if (seenMap.find(caller) != seenMap.end()) { 834239310Sdim emitDeclaration(F, O); 835239310Sdim break; 836239310Sdim } 837239310Sdim } 838239310Sdim seenMap[F] = true; 839239310Sdim } 840239310Sdim} 841239310Sdim 842239310Sdimvoid NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 843239310Sdim DebugInfoFinder DbgFinder; 844239310Sdim DbgFinder.processModule(M); 845239310Sdim 846252723Sdim unsigned i = 1; 847239310Sdim for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 848252723Sdim E = DbgFinder.compile_unit_end(); 849252723Sdim I != E; ++I) { 850239310Sdim DICompileUnit DIUnit(*I); 851239310Sdim StringRef Filename(DIUnit.getFilename()); 852239310Sdim StringRef Dirname(DIUnit.getDirectory()); 853239310Sdim SmallString<128> FullPathName = Dirname; 854239310Sdim if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 855239310Sdim sys::path::append(FullPathName, Filename); 856239310Sdim Filename = FullPathName.str(); 857239310Sdim } 858239310Sdim if (filenameMap.find(Filename.str()) != filenameMap.end()) 859239310Sdim continue; 860239310Sdim filenameMap[Filename.str()] = i; 861239310Sdim OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 862239310Sdim ++i; 863239310Sdim } 864239310Sdim 865239310Sdim for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 866252723Sdim E = DbgFinder.subprogram_end(); 867252723Sdim I != E; ++I) { 868239310Sdim DISubprogram SP(*I); 869239310Sdim StringRef Filename(SP.getFilename()); 870239310Sdim StringRef Dirname(SP.getDirectory()); 871239310Sdim SmallString<128> FullPathName = Dirname; 872239310Sdim if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 873239310Sdim sys::path::append(FullPathName, Filename); 874239310Sdim Filename = FullPathName.str(); 875239310Sdim } 876239310Sdim if (filenameMap.find(Filename.str()) != filenameMap.end()) 877239310Sdim continue; 878239310Sdim filenameMap[Filename.str()] = i; 879239310Sdim ++i; 880239310Sdim } 881239310Sdim} 882239310Sdim 883252723Sdimbool NVPTXAsmPrinter::doInitialization(Module &M) { 884239310Sdim 885239310Sdim SmallString<128> Str1; 886239310Sdim raw_svector_ostream OS1(Str1); 887239310Sdim 888239310Sdim MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 889239310Sdim MMI->AnalyzeModule(M); 890239310Sdim 891239310Sdim // We need to call the parent's one explicitly. 892239310Sdim //bool Result = AsmPrinter::doInitialization(M); 893239310Sdim 894239310Sdim // Initialize TargetLoweringObjectFile. 895252723Sdim const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 896252723Sdim .Initialize(OutContext, TM); 897239310Sdim 898263509Sdim Mang = new Mangler(&TM); 899239310Sdim 900239310Sdim // Emit header before any dwarf directives are emitted below. 901239310Sdim emitHeader(M, OS1); 902239310Sdim OutStreamer.EmitRawText(OS1.str()); 903239310Sdim 904239310Sdim // Already commented out 905239310Sdim //bool Result = AsmPrinter::doInitialization(M); 906239310Sdim 907263509Sdim // Emit module-level inline asm if it exists. 908263509Sdim if (!M.getModuleInlineAsm().empty()) { 909263509Sdim OutStreamer.AddComment("Start of file scope inline assembly"); 910263509Sdim OutStreamer.AddBlankLine(); 911263509Sdim OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 912263509Sdim OutStreamer.AddBlankLine(); 913263509Sdim OutStreamer.AddComment("End of file scope inline assembly"); 914263509Sdim OutStreamer.AddBlankLine(); 915263509Sdim } 916263509Sdim 917239310Sdim if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 918239310Sdim recordAndEmitFilenames(M); 919239310Sdim 920252723Sdim GlobalsEmitted = false; 921252723Sdim 922252723Sdim return false; // success 923252723Sdim} 924252723Sdim 925252723Sdimvoid NVPTXAsmPrinter::emitGlobals(const Module &M) { 926239310Sdim SmallString<128> Str2; 927239310Sdim raw_svector_ostream OS2(Str2); 928239310Sdim 929239310Sdim emitDeclarations(M, OS2); 930239310Sdim 931245431Sdim // As ptxas does not support forward references of globals, we need to first 932245431Sdim // sort the list of module-level globals in def-use order. We visit each 933245431Sdim // global variable in order, and ensure that we emit it *after* its dependent 934245431Sdim // globals. We use a little extra memory maintaining both a set and a list to 935245431Sdim // have fast searches while maintaining a strict ordering. 936252723Sdim SmallVector<const GlobalVariable *, 8> Globals; 937252723Sdim DenseSet<const GlobalVariable *> GVVisited; 938252723Sdim DenseSet<const GlobalVariable *> GVVisiting; 939245431Sdim 940245431Sdim // Visit each global variable, in order 941252723Sdim for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 942245431Sdim I != E; ++I) 943245431Sdim VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 944239310Sdim 945252723Sdim assert(GVVisited.size() == M.getGlobalList().size() && 946245431Sdim "Missed a global variable"); 947245431Sdim assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 948245431Sdim 949245431Sdim // Print out module-level global variables in proper order 950245431Sdim for (unsigned i = 0, e = Globals.size(); i != e; ++i) 951245431Sdim printModuleLevelGV(Globals[i], OS2); 952245431Sdim 953239310Sdim OS2 << '\n'; 954239310Sdim 955239310Sdim OutStreamer.EmitRawText(OS2.str()); 956239310Sdim} 957239310Sdim 958252723Sdimvoid NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 959239310Sdim O << "//\n"; 960239310Sdim O << "// Generated by LLVM NVPTX Back-End\n"; 961239310Sdim O << "//\n"; 962239310Sdim O << "\n"; 963239310Sdim 964245431Sdim unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 965245431Sdim O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 966239310Sdim 967239310Sdim O << ".target "; 968239310Sdim O << nvptxSubtarget.getTargetName(); 969239310Sdim 970239310Sdim if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 971239310Sdim O << ", texmode_independent"; 972239310Sdim if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 973239310Sdim if (!nvptxSubtarget.hasDouble()) 974239310Sdim O << ", map_f64_to_f32"; 975239310Sdim } 976239310Sdim 977239310Sdim if (MAI->doesSupportDebugInformation()) 978239310Sdim O << ", debug"; 979239310Sdim 980239310Sdim O << "\n"; 981239310Sdim 982239310Sdim O << ".address_size "; 983239310Sdim if (nvptxSubtarget.is64Bit()) 984239310Sdim O << "64"; 985239310Sdim else 986239310Sdim O << "32"; 987239310Sdim O << "\n"; 988239310Sdim 989239310Sdim O << "\n"; 990239310Sdim} 991239310Sdim 992239310Sdimbool NVPTXAsmPrinter::doFinalization(Module &M) { 993252723Sdim 994252723Sdim // If we did not emit any functions, then the global declarations have not 995252723Sdim // yet been emitted. 996252723Sdim if (!GlobalsEmitted) { 997252723Sdim emitGlobals(M); 998252723Sdim GlobalsEmitted = true; 999252723Sdim } 1000252723Sdim 1001239310Sdim // XXX Temproarily remove global variables so that doFinalization() will not 1002239310Sdim // emit them again (global variables are emitted at beginning). 1003239310Sdim 1004239310Sdim Module::GlobalListType &global_list = M.getGlobalList(); 1005239310Sdim int i, n = global_list.size(); 1006252723Sdim GlobalVariable **gv_array = new GlobalVariable *[n]; 1007239310Sdim 1008239310Sdim // first, back-up GlobalVariable in gv_array 1009239310Sdim i = 0; 1010239310Sdim for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1011252723Sdim I != E; ++I) 1012239310Sdim gv_array[i++] = &*I; 1013239310Sdim 1014239310Sdim // second, empty global_list 1015239310Sdim while (!global_list.empty()) 1016239310Sdim global_list.remove(global_list.begin()); 1017239310Sdim 1018239310Sdim // call doFinalization 1019239310Sdim bool ret = AsmPrinter::doFinalization(M); 1020239310Sdim 1021239310Sdim // now we restore global variables 1022252723Sdim for (i = 0; i < n; i++) 1023239310Sdim global_list.insert(global_list.end(), gv_array[i]); 1024239310Sdim 1025239310Sdim delete[] gv_array; 1026239310Sdim return ret; 1027239310Sdim 1028239310Sdim //bool Result = AsmPrinter::doFinalization(M); 1029239310Sdim // Instead of calling the parents doFinalization, we may 1030239310Sdim // clone parents doFinalization and customize here. 1031239310Sdim // Currently, we if NVISA out the EmitGlobals() in 1032239310Sdim // parent's doFinalization, which is too intrusive. 1033239310Sdim // 1034239310Sdim // Same for the doInitialization. 1035239310Sdim //return Result; 1036239310Sdim} 1037239310Sdim 1038239310Sdim// This function emits appropriate linkage directives for 1039239310Sdim// functions and global variables. 1040239310Sdim// 1041239310Sdim// extern function declaration -> .extern 1042239310Sdim// extern function definition -> .visible 1043239310Sdim// external global variable with init -> .visible 1044239310Sdim// external without init -> .extern 1045239310Sdim// appending -> not allowed, assert. 1046239310Sdim 1047252723Sdimvoid NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1048252723Sdim raw_ostream &O) { 1049239310Sdim if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1050239310Sdim if (V->hasExternalLinkage()) { 1051239310Sdim if (isa<GlobalVariable>(V)) { 1052239310Sdim const GlobalVariable *GVar = cast<GlobalVariable>(V); 1053239310Sdim if (GVar) { 1054239310Sdim if (GVar->hasInitializer()) 1055239310Sdim O << ".visible "; 1056239310Sdim else 1057239310Sdim O << ".extern "; 1058239310Sdim } 1059239310Sdim } else if (V->isDeclaration()) 1060239310Sdim O << ".extern "; 1061239310Sdim else 1062239310Sdim O << ".visible "; 1063239310Sdim } else if (V->hasAppendingLinkage()) { 1064239310Sdim std::string msg; 1065239310Sdim msg.append("Error: "); 1066239310Sdim msg.append("Symbol "); 1067239310Sdim if (V->hasName()) 1068239310Sdim msg.append(V->getName().str()); 1069239310Sdim msg.append("has unsupported appending linkage type"); 1070239310Sdim llvm_unreachable(msg.c_str()); 1071239310Sdim } 1072239310Sdim } 1073239310Sdim} 1074239310Sdim 1075252723Sdimvoid NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1076252723Sdim raw_ostream &O, 1077239310Sdim bool processDemoted) { 1078239310Sdim 1079239310Sdim // Skip meta data 1080239310Sdim if (GVar->hasSection()) { 1081239310Sdim if (GVar->getSection() == "llvm.metadata") 1082239310Sdim return; 1083239310Sdim } 1084239310Sdim 1085245431Sdim const DataLayout *TD = TM.getDataLayout(); 1086239310Sdim 1087239310Sdim // GlobalVariables are always constant pointers themselves. 1088239310Sdim const PointerType *PTy = GVar->getType(); 1089239310Sdim Type *ETy = PTy->getElementType(); 1090239310Sdim 1091239310Sdim if (GVar->hasExternalLinkage()) { 1092239310Sdim if (GVar->hasInitializer()) 1093239310Sdim O << ".visible "; 1094239310Sdim else 1095239310Sdim O << ".extern "; 1096239310Sdim } 1097239310Sdim 1098239310Sdim if (llvm::isTexture(*GVar)) { 1099239310Sdim O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1100239310Sdim return; 1101239310Sdim } 1102239310Sdim 1103239310Sdim if (llvm::isSurface(*GVar)) { 1104239310Sdim O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1105239310Sdim return; 1106239310Sdim } 1107239310Sdim 1108239310Sdim if (GVar->isDeclaration()) { 1109239310Sdim // (extern) declarations, no definition or initializer 1110239310Sdim // Currently the only known declaration is for an automatic __local 1111239310Sdim // (.shared) promoted to global. 1112239310Sdim emitPTXGlobalVariable(GVar, O); 1113239310Sdim O << ";\n"; 1114239310Sdim return; 1115239310Sdim } 1116239310Sdim 1117239310Sdim if (llvm::isSampler(*GVar)) { 1118239310Sdim O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1119239310Sdim 1120252723Sdim const Constant *Initializer = NULL; 1121239310Sdim if (GVar->hasInitializer()) 1122239310Sdim Initializer = GVar->getInitializer(); 1123252723Sdim const ConstantInt *CI = NULL; 1124239310Sdim if (Initializer) 1125239310Sdim CI = dyn_cast<ConstantInt>(Initializer); 1126239310Sdim if (CI) { 1127252723Sdim unsigned sample = CI->getZExtValue(); 1128239310Sdim 1129239310Sdim O << " = { "; 1130239310Sdim 1131252723Sdim for (int i = 0, 1132252723Sdim addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1133252723Sdim i < 3; i++) { 1134239310Sdim O << "addr_mode_" << i << " = "; 1135239310Sdim switch (addr) { 1136252723Sdim case 0: 1137252723Sdim O << "wrap"; 1138252723Sdim break; 1139252723Sdim case 1: 1140252723Sdim O << "clamp_to_border"; 1141252723Sdim break; 1142252723Sdim case 2: 1143252723Sdim O << "clamp_to_edge"; 1144252723Sdim break; 1145252723Sdim case 3: 1146252723Sdim O << "wrap"; 1147252723Sdim break; 1148252723Sdim case 4: 1149252723Sdim O << "mirror"; 1150252723Sdim break; 1151239310Sdim } 1152252723Sdim O << ", "; 1153239310Sdim } 1154239310Sdim O << "filter_mode = "; 1155252723Sdim switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1156252723Sdim case 0: 1157252723Sdim O << "nearest"; 1158252723Sdim break; 1159252723Sdim case 1: 1160252723Sdim O << "linear"; 1161252723Sdim break; 1162252723Sdim case 2: 1163252723Sdim assert(0 && "Anisotropic filtering is not supported"); 1164252723Sdim default: 1165252723Sdim O << "nearest"; 1166252723Sdim break; 1167239310Sdim } 1168252723Sdim if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1169239310Sdim O << ", force_unnormalized_coords = 1"; 1170239310Sdim } 1171239310Sdim O << " }"; 1172239310Sdim } 1173239310Sdim 1174239310Sdim O << ";\n"; 1175239310Sdim return; 1176239310Sdim } 1177239310Sdim 1178239310Sdim if (GVar->hasPrivateLinkage()) { 1179239310Sdim 1180239310Sdim if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1181239310Sdim return; 1182239310Sdim 1183239310Sdim // FIXME - need better way (e.g. Metadata) to avoid generating this global 1184239310Sdim if (!strncmp(GVar->getName().data(), "filename", 8)) 1185239310Sdim return; 1186239310Sdim if (GVar->use_empty()) 1187239310Sdim return; 1188239310Sdim } 1189239310Sdim 1190239310Sdim const Function *demotedFunc = 0; 1191239310Sdim if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1192239310Sdim O << "// " << GVar->getName().str() << " has been demoted\n"; 1193239310Sdim if (localDecls.find(demotedFunc) != localDecls.end()) 1194239310Sdim localDecls[demotedFunc].push_back(GVar); 1195239310Sdim else { 1196252723Sdim std::vector<const GlobalVariable *> temp; 1197239310Sdim temp.push_back(GVar); 1198239310Sdim localDecls[demotedFunc] = temp; 1199239310Sdim } 1200239310Sdim return; 1201239310Sdim } 1202239310Sdim 1203239310Sdim O << "."; 1204239310Sdim emitPTXAddressSpace(PTy->getAddressSpace(), O); 1205239310Sdim if (GVar->getAlignment() == 0) 1206239310Sdim O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1207239310Sdim else 1208239310Sdim O << " .align " << GVar->getAlignment(); 1209239310Sdim 1210239310Sdim if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1211239310Sdim O << " ."; 1212252723Sdim // Special case: ABI requires that we use .u8 for predicates 1213252723Sdim if (ETy->isIntegerTy(1)) 1214252723Sdim O << "u8"; 1215252723Sdim else 1216252723Sdim O << getPTXFundamentalTypeStr(ETy, false); 1217239310Sdim O << " "; 1218263509Sdim O << *getSymbol(GVar); 1219239310Sdim 1220239310Sdim // Ptx allows variable initilization only for constant and global state 1221239310Sdim // spaces. 1222239310Sdim if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1223252723Sdim (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1224252723Sdim GVar->hasInitializer()) { 1225252723Sdim const Constant *Initializer = GVar->getInitializer(); 1226239310Sdim if (!Initializer->isNullValue()) { 1227252723Sdim O << " = "; 1228239310Sdim printScalarConstant(Initializer, O); 1229239310Sdim } 1230239310Sdim } 1231239310Sdim } else { 1232252723Sdim unsigned int ElementSize = 0; 1233239310Sdim 1234239310Sdim // Although PTX has direct support for struct type and array type and 1235239310Sdim // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1236239310Sdim // targets that support these high level field accesses. Structs, arrays 1237239310Sdim // and vectors are lowered into arrays of bytes. 1238239310Sdim switch (ETy->getTypeID()) { 1239239310Sdim case Type::StructTyID: 1240239310Sdim case Type::ArrayTyID: 1241239310Sdim case Type::VectorTyID: 1242239310Sdim ElementSize = TD->getTypeStoreSize(ETy); 1243239310Sdim // Ptx allows variable initilization only for constant and 1244239310Sdim // global state spaces. 1245239310Sdim if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1246252723Sdim (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1247252723Sdim GVar->hasInitializer()) { 1248252723Sdim const Constant *Initializer = GVar->getInitializer(); 1249252723Sdim if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1250239310Sdim AggBuffer aggBuffer(ElementSize, O, *this); 1251239310Sdim bufferAggregateConstant(Initializer, &aggBuffer); 1252239310Sdim if (aggBuffer.numSymbols) { 1253239310Sdim if (nvptxSubtarget.is64Bit()) { 1254263509Sdim O << " .u64 " << *getSymbol(GVar) << "["; 1255252723Sdim O << ElementSize / 8; 1256252723Sdim } else { 1257263509Sdim O << " .u32 " << *getSymbol(GVar) << "["; 1258252723Sdim O << ElementSize / 4; 1259239310Sdim } 1260239310Sdim O << "]"; 1261252723Sdim } else { 1262263509Sdim O << " .b8 " << *getSymbol(GVar) << "["; 1263239310Sdim O << ElementSize; 1264239310Sdim O << "]"; 1265239310Sdim } 1266252723Sdim O << " = {"; 1267239310Sdim aggBuffer.print(); 1268239310Sdim O << "}"; 1269252723Sdim } else { 1270263509Sdim O << " .b8 " << *getSymbol(GVar); 1271239310Sdim if (ElementSize) { 1272252723Sdim O << "["; 1273239310Sdim O << ElementSize; 1274239310Sdim O << "]"; 1275239310Sdim } 1276239310Sdim } 1277252723Sdim } else { 1278263509Sdim O << " .b8 " << *getSymbol(GVar); 1279239310Sdim if (ElementSize) { 1280252723Sdim O << "["; 1281239310Sdim O << ElementSize; 1282239310Sdim O << "]"; 1283239310Sdim } 1284239310Sdim } 1285239310Sdim break; 1286239310Sdim default: 1287252723Sdim assert(0 && "type not supported yet"); 1288239310Sdim } 1289239310Sdim 1290239310Sdim } 1291239310Sdim O << ";\n"; 1292239310Sdim} 1293239310Sdim 1294239310Sdimvoid NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1295239310Sdim if (localDecls.find(f) == localDecls.end()) 1296239310Sdim return; 1297239310Sdim 1298252723Sdim std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1299239310Sdim 1300252723Sdim for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1301239310Sdim O << "\t// demoted variable\n\t"; 1302239310Sdim printModuleLevelGV(gvars[i], O, true); 1303239310Sdim } 1304239310Sdim} 1305239310Sdim 1306239310Sdimvoid NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1307239310Sdim raw_ostream &O) const { 1308239310Sdim switch (AddressSpace) { 1309239310Sdim case llvm::ADDRESS_SPACE_LOCAL: 1310252723Sdim O << "local"; 1311239310Sdim break; 1312239310Sdim case llvm::ADDRESS_SPACE_GLOBAL: 1313252723Sdim O << "global"; 1314239310Sdim break; 1315239310Sdim case llvm::ADDRESS_SPACE_CONST: 1316252723Sdim O << "const"; 1317239310Sdim break; 1318239310Sdim case llvm::ADDRESS_SPACE_SHARED: 1319252723Sdim O << "shared"; 1320239310Sdim break; 1321239310Sdim default: 1322252723Sdim report_fatal_error("Bad address space found while emitting PTX"); 1323252723Sdim break; 1324239310Sdim } 1325239310Sdim} 1326239310Sdim 1327252723Sdimstd::string 1328252723SdimNVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1329239310Sdim switch (Ty->getTypeID()) { 1330239310Sdim default: 1331239310Sdim llvm_unreachable("unexpected type"); 1332239310Sdim break; 1333239310Sdim case Type::IntegerTyID: { 1334239310Sdim unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1335239310Sdim if (NumBits == 1) 1336239310Sdim return "pred"; 1337239310Sdim else if (NumBits <= 64) { 1338239310Sdim std::string name = "u"; 1339239310Sdim return name + utostr(NumBits); 1340239310Sdim } else { 1341239310Sdim llvm_unreachable("Integer too large"); 1342239310Sdim break; 1343239310Sdim } 1344239310Sdim break; 1345239310Sdim } 1346239310Sdim case Type::FloatTyID: 1347239310Sdim return "f32"; 1348239310Sdim case Type::DoubleTyID: 1349239310Sdim return "f64"; 1350239310Sdim case Type::PointerTyID: 1351239310Sdim if (nvptxSubtarget.is64Bit()) 1352252723Sdim if (useB4PTR) 1353252723Sdim return "b64"; 1354252723Sdim else 1355252723Sdim return "u64"; 1356252723Sdim else if (useB4PTR) 1357252723Sdim return "b32"; 1358239310Sdim else 1359252723Sdim return "u32"; 1360239310Sdim } 1361239310Sdim llvm_unreachable("unexpected type"); 1362239310Sdim return NULL; 1363239310Sdim} 1364239310Sdim 1365252723Sdimvoid NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1366239310Sdim raw_ostream &O) { 1367239310Sdim 1368245431Sdim const DataLayout *TD = TM.getDataLayout(); 1369239310Sdim 1370239310Sdim // GlobalVariables are always constant pointers themselves. 1371239310Sdim const PointerType *PTy = GVar->getType(); 1372239310Sdim Type *ETy = PTy->getElementType(); 1373239310Sdim 1374239310Sdim O << "."; 1375239310Sdim emitPTXAddressSpace(PTy->getAddressSpace(), O); 1376239310Sdim if (GVar->getAlignment() == 0) 1377239310Sdim O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1378239310Sdim else 1379239310Sdim O << " .align " << GVar->getAlignment(); 1380239310Sdim 1381239310Sdim if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1382239310Sdim O << " ."; 1383239310Sdim O << getPTXFundamentalTypeStr(ETy); 1384239310Sdim O << " "; 1385263509Sdim O << *getSymbol(GVar); 1386239310Sdim return; 1387239310Sdim } 1388239310Sdim 1389252723Sdim int64_t ElementSize = 0; 1390239310Sdim 1391239310Sdim // Although PTX has direct support for struct type and array type and LLVM IR 1392239310Sdim // is very similar to PTX, the LLVM CodeGen does not support for targets that 1393239310Sdim // support these high level field accesses. Structs and arrays are lowered 1394239310Sdim // into arrays of bytes. 1395239310Sdim switch (ETy->getTypeID()) { 1396239310Sdim case Type::StructTyID: 1397239310Sdim case Type::ArrayTyID: 1398239310Sdim case Type::VectorTyID: 1399239310Sdim ElementSize = TD->getTypeStoreSize(ETy); 1400263509Sdim O << " .b8 " << *getSymbol(GVar) << "["; 1401239310Sdim if (ElementSize) { 1402252723Sdim O << itostr(ElementSize); 1403239310Sdim } 1404239310Sdim O << "]"; 1405239310Sdim break; 1406239310Sdim default: 1407252723Sdim assert(0 && "type not supported yet"); 1408239310Sdim } 1409252723Sdim return; 1410239310Sdim} 1411239310Sdim 1412252723Sdimstatic unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1413239310Sdim if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1414239310Sdim return TD->getPrefTypeAlignment(Ty); 1415239310Sdim 1416239310Sdim const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1417239310Sdim if (ATy) 1418239310Sdim return getOpenCLAlignment(TD, ATy->getElementType()); 1419239310Sdim 1420239310Sdim const VectorType *VTy = dyn_cast<VectorType>(Ty); 1421239310Sdim if (VTy) { 1422239310Sdim Type *ETy = VTy->getElementType(); 1423239310Sdim unsigned int numE = VTy->getNumElements(); 1424239310Sdim unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1425239310Sdim if (numE == 3) 1426252723Sdim return 4 * alignE; 1427239310Sdim else 1428252723Sdim return numE * alignE; 1429239310Sdim } 1430239310Sdim 1431239310Sdim const StructType *STy = dyn_cast<StructType>(Ty); 1432239310Sdim if (STy) { 1433239310Sdim unsigned int alignStruct = 1; 1434239310Sdim // Go through each element of the struct and find the 1435239310Sdim // largest alignment. 1436252723Sdim for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1437239310Sdim Type *ETy = STy->getElementType(i); 1438239310Sdim unsigned int align = getOpenCLAlignment(TD, ETy); 1439239310Sdim if (align > alignStruct) 1440239310Sdim alignStruct = align; 1441239310Sdim } 1442239310Sdim return alignStruct; 1443239310Sdim } 1444239310Sdim 1445239310Sdim const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1446239310Sdim if (FTy) 1447239310Sdim return TD->getPointerPrefAlignment(); 1448239310Sdim return TD->getPrefTypeAlignment(Ty); 1449239310Sdim} 1450239310Sdim 1451239310Sdimvoid NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1452239310Sdim int paramIndex, raw_ostream &O) { 1453239310Sdim if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1454239310Sdim (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1455263509Sdim O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 1456239310Sdim else { 1457239310Sdim std::string argName = I->getName(); 1458239310Sdim const char *p = argName.c_str(); 1459239310Sdim while (*p) { 1460239310Sdim if (*p == '.') 1461239310Sdim O << "_"; 1462239310Sdim else 1463239310Sdim O << *p; 1464239310Sdim p++; 1465239310Sdim } 1466239310Sdim } 1467239310Sdim} 1468239310Sdim 1469239310Sdimvoid NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1470239310Sdim Function::const_arg_iterator I, E; 1471239310Sdim int i = 0; 1472239310Sdim 1473239310Sdim if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1474239310Sdim (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1475239310Sdim O << *CurrentFnSym << "_param_" << paramIndex; 1476239310Sdim return; 1477239310Sdim } 1478239310Sdim 1479239310Sdim for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1480252723Sdim if (i == paramIndex) { 1481239310Sdim printParamName(I, paramIndex, O); 1482239310Sdim return; 1483239310Sdim } 1484239310Sdim } 1485239310Sdim llvm_unreachable("paramIndex out of bound"); 1486239310Sdim} 1487239310Sdim 1488252723Sdimvoid NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1489245431Sdim const DataLayout *TD = TM.getDataLayout(); 1490252723Sdim const AttributeSet &PAL = F->getAttributes(); 1491239310Sdim const TargetLowering *TLI = TM.getTargetLowering(); 1492239310Sdim Function::const_arg_iterator I, E; 1493239310Sdim unsigned paramIndex = 0; 1494239310Sdim bool first = true; 1495239310Sdim bool isKernelFunc = llvm::isKernelFunction(*F); 1496239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1497239310Sdim MVT thePointerTy = TLI->getPointerTy(); 1498239310Sdim 1499239310Sdim O << "(\n"; 1500239310Sdim 1501239310Sdim for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1502252723Sdim Type *Ty = I->getType(); 1503239310Sdim 1504239310Sdim if (!first) 1505239310Sdim O << ",\n"; 1506239310Sdim 1507239310Sdim first = false; 1508239310Sdim 1509239310Sdim // Handle image/sampler parameters 1510239310Sdim if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1511239310Sdim if (llvm::isImage(*I)) { 1512239310Sdim std::string sname = I->getName(); 1513239310Sdim if (llvm::isImageWriteOnly(*I)) 1514263509Sdim O << "\t.param .surfref " << *getSymbol(F) << "_param_" 1515252723Sdim << paramIndex; 1516239310Sdim else // Default image is read_only 1517263509Sdim O << "\t.param .texref " << *getSymbol(F) << "_param_" 1518252723Sdim << paramIndex; 1519252723Sdim } else // Should be llvm::isSampler(*I) 1520263509Sdim O << "\t.param .samplerref " << *getSymbol(F) << "_param_" 1521252723Sdim << paramIndex; 1522239310Sdim continue; 1523239310Sdim } 1524239310Sdim 1525252723Sdim if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1526252723Sdim if (Ty->isVectorTy()) { 1527252723Sdim // Just print .param .b8 .align <a> .param[size]; 1528252723Sdim // <a> = PAL.getparamalignment 1529252723Sdim // size = typeallocsize of element type 1530252723Sdim unsigned align = PAL.getParamAlignment(paramIndex + 1); 1531252723Sdim if (align == 0) 1532252723Sdim align = TD->getABITypeAlignment(Ty); 1533252723Sdim 1534252723Sdim unsigned sz = TD->getTypeAllocSize(Ty); 1535252723Sdim O << "\t.param .align " << align << " .b8 "; 1536252723Sdim printParamName(I, paramIndex, O); 1537252723Sdim O << "[" << sz << "]"; 1538252723Sdim 1539252723Sdim continue; 1540252723Sdim } 1541239310Sdim // Just a scalar 1542239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 1543239310Sdim if (isKernelFunc) { 1544239310Sdim if (PTy) { 1545239310Sdim // Special handling for pointer arguments to kernel 1546239310Sdim O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1547239310Sdim 1548239310Sdim if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1549239310Sdim Type *ETy = PTy->getElementType(); 1550239310Sdim int addrSpace = PTy->getAddressSpace(); 1551252723Sdim switch (addrSpace) { 1552239310Sdim default: 1553239310Sdim O << ".ptr "; 1554239310Sdim break; 1555263509Sdim case llvm::ADDRESS_SPACE_CONST: 1556239310Sdim O << ".ptr .const "; 1557239310Sdim break; 1558239310Sdim case llvm::ADDRESS_SPACE_SHARED: 1559239310Sdim O << ".ptr .shared "; 1560239310Sdim break; 1561239310Sdim case llvm::ADDRESS_SPACE_GLOBAL: 1562239310Sdim O << ".ptr .global "; 1563239310Sdim break; 1564239310Sdim } 1565252723Sdim O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1566239310Sdim } 1567239310Sdim printParamName(I, paramIndex, O); 1568239310Sdim continue; 1569239310Sdim } 1570239310Sdim 1571239310Sdim // non-pointer scalar to kernel func 1572252723Sdim O << "\t.param ."; 1573252723Sdim // Special case: predicate operands become .u8 types 1574252723Sdim if (Ty->isIntegerTy(1)) 1575252723Sdim O << "u8"; 1576252723Sdim else 1577252723Sdim O << getPTXFundamentalTypeStr(Ty); 1578252723Sdim O << " "; 1579239310Sdim printParamName(I, paramIndex, O); 1580239310Sdim continue; 1581239310Sdim } 1582239310Sdim // Non-kernel function, just print .param .b<size> for ABI 1583239310Sdim // and .reg .b<size> for non ABY 1584239310Sdim unsigned sz = 0; 1585239310Sdim if (isa<IntegerType>(Ty)) { 1586239310Sdim sz = cast<IntegerType>(Ty)->getBitWidth(); 1587252723Sdim if (sz < 32) 1588252723Sdim sz = 32; 1589252723Sdim } else if (isa<PointerType>(Ty)) 1590239310Sdim sz = thePointerTy.getSizeInBits(); 1591239310Sdim else 1592239310Sdim sz = Ty->getPrimitiveSizeInBits(); 1593239310Sdim if (isABI) 1594239310Sdim O << "\t.param .b" << sz << " "; 1595239310Sdim else 1596239310Sdim O << "\t.reg .b" << sz << " "; 1597239310Sdim printParamName(I, paramIndex, O); 1598239310Sdim continue; 1599239310Sdim } 1600239310Sdim 1601239310Sdim // param has byVal attribute. So should be a pointer 1602239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 1603252723Sdim assert(PTy && "Param with byval attribute should be a pointer type"); 1604239310Sdim Type *ETy = PTy->getElementType(); 1605239310Sdim 1606239310Sdim if (isABI || isKernelFunc) { 1607239310Sdim // Just print .param .b8 .align <a> .param[size]; 1608239310Sdim // <a> = PAL.getparamalignment 1609239310Sdim // size = typeallocsize of element type 1610252723Sdim unsigned align = PAL.getParamAlignment(paramIndex + 1); 1611245431Sdim if (align == 0) 1612245431Sdim align = TD->getABITypeAlignment(ETy); 1613245431Sdim 1614239310Sdim unsigned sz = TD->getTypeAllocSize(ETy); 1615252723Sdim O << "\t.param .align " << align << " .b8 "; 1616239310Sdim printParamName(I, paramIndex, O); 1617239310Sdim O << "[" << sz << "]"; 1618239310Sdim continue; 1619239310Sdim } else { 1620239310Sdim // Split the ETy into constituent parts and 1621239310Sdim // print .param .b<size> <name> for each part. 1622239310Sdim // Further, if a part is vector, print the above for 1623239310Sdim // each vector element. 1624239310Sdim SmallVector<EVT, 16> vtparts; 1625239310Sdim ComputeValueVTs(*TLI, ETy, vtparts); 1626252723Sdim for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1627239310Sdim unsigned elems = 1; 1628239310Sdim EVT elemtype = vtparts[i]; 1629239310Sdim if (vtparts[i].isVector()) { 1630239310Sdim elems = vtparts[i].getVectorNumElements(); 1631239310Sdim elemtype = vtparts[i].getVectorElementType(); 1632239310Sdim } 1633239310Sdim 1634252723Sdim for (unsigned j = 0, je = elems; j != je; ++j) { 1635239310Sdim unsigned sz = elemtype.getSizeInBits(); 1636252723Sdim if (elemtype.isInteger() && (sz < 32)) 1637252723Sdim sz = 32; 1638239310Sdim O << "\t.reg .b" << sz << " "; 1639239310Sdim printParamName(I, paramIndex, O); 1640252723Sdim if (j < je - 1) 1641252723Sdim O << ",\n"; 1642239310Sdim ++paramIndex; 1643239310Sdim } 1644252723Sdim if (i < e - 1) 1645239310Sdim O << ",\n"; 1646239310Sdim } 1647239310Sdim --paramIndex; 1648239310Sdim continue; 1649239310Sdim } 1650239310Sdim } 1651239310Sdim 1652239310Sdim O << "\n)\n"; 1653239310Sdim} 1654239310Sdim 1655239310Sdimvoid NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1656239310Sdim raw_ostream &O) { 1657239310Sdim const Function *F = MF.getFunction(); 1658239310Sdim emitFunctionParamList(F, O); 1659239310Sdim} 1660239310Sdim 1661252723Sdimvoid NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1662252723Sdim const MachineFunction &MF) { 1663239310Sdim SmallString<128> Str; 1664239310Sdim raw_svector_ostream O(Str); 1665239310Sdim 1666239310Sdim // Map the global virtual register number to a register class specific 1667239310Sdim // virtual register number starting from 1 with that class. 1668239310Sdim const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1669239310Sdim //unsigned numRegClasses = TRI->getNumRegClasses(); 1670239310Sdim 1671239310Sdim // Emit the Fake Stack Object 1672239310Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 1673239310Sdim int NumBytes = (int) MFI->getStackSize(); 1674239310Sdim if (NumBytes) { 1675252723Sdim O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1676252723Sdim << getFunctionNumber() << "[" << NumBytes << "];\n"; 1677239310Sdim if (nvptxSubtarget.is64Bit()) { 1678239310Sdim O << "\t.reg .b64 \t%SP;\n"; 1679239310Sdim O << "\t.reg .b64 \t%SPL;\n"; 1680252723Sdim } else { 1681239310Sdim O << "\t.reg .b32 \t%SP;\n"; 1682239310Sdim O << "\t.reg .b32 \t%SPL;\n"; 1683239310Sdim } 1684239310Sdim } 1685239310Sdim 1686239310Sdim // Go through all virtual registers to establish the mapping between the 1687239310Sdim // global virtual 1688239310Sdim // register number and the per class virtual register number. 1689239310Sdim // We use the per class virtual register number in the ptx output. 1690239310Sdim unsigned int numVRs = MRI->getNumVirtRegs(); 1691252723Sdim for (unsigned i = 0; i < numVRs; i++) { 1692239310Sdim unsigned int vr = TRI->index2VirtReg(i); 1693239310Sdim const TargetRegisterClass *RC = MRI->getRegClass(vr); 1694263509Sdim DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1695239310Sdim int n = regmap.size(); 1696252723Sdim regmap.insert(std::make_pair(vr, n + 1)); 1697239310Sdim } 1698239310Sdim 1699239310Sdim // Emit register declarations 1700239310Sdim // @TODO: Extract out the real register usage 1701263509Sdim // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1702263509Sdim // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1703263509Sdim // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1704263509Sdim // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1705263509Sdim // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1706263509Sdim // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1707263509Sdim // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1708239310Sdim 1709239310Sdim // Emit declaration of the virtual registers or 'physical' registers for 1710239310Sdim // each register class 1711263509Sdim for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1712263509Sdim const TargetRegisterClass *RC = TRI->getRegClass(i); 1713263509Sdim DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1714263509Sdim std::string rcname = getNVPTXRegClassName(RC); 1715263509Sdim std::string rcStr = getNVPTXRegClassStr(RC); 1716263509Sdim int n = regmap.size(); 1717239310Sdim 1718263509Sdim // Only declare those registers that may be used. 1719263509Sdim if (n) { 1720263509Sdim O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1721263509Sdim << ">;\n"; 1722263509Sdim } 1723263509Sdim } 1724239310Sdim 1725239310Sdim OutStreamer.EmitRawText(O.str()); 1726239310Sdim} 1727239310Sdim 1728239310Sdimvoid NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1729252723Sdim APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1730239310Sdim bool ignored; 1731239310Sdim unsigned int numHex; 1732239310Sdim const char *lead; 1733239310Sdim 1734252723Sdim if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1735239310Sdim numHex = 8; 1736239310Sdim lead = "0f"; 1737252723Sdim APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1738239310Sdim } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1739239310Sdim numHex = 16; 1740239310Sdim lead = "0d"; 1741252723Sdim APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1742239310Sdim } else 1743239310Sdim llvm_unreachable("unsupported fp type"); 1744239310Sdim 1745239310Sdim APInt API = APF.bitcastToAPInt(); 1746239310Sdim std::string hexstr(utohexstr(API.getZExtValue())); 1747239310Sdim O << lead; 1748239310Sdim if (hexstr.length() < numHex) 1749239310Sdim O << std::string(numHex - hexstr.length(), '0'); 1750239310Sdim O << utohexstr(API.getZExtValue()); 1751239310Sdim} 1752239310Sdim 1753252723Sdimvoid NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1754252723Sdim if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1755239310Sdim O << CI->getValue(); 1756239310Sdim return; 1757239310Sdim } 1758252723Sdim if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1759239310Sdim printFPConstant(CFP, O); 1760239310Sdim return; 1761239310Sdim } 1762239310Sdim if (isa<ConstantPointerNull>(CPV)) { 1763239310Sdim O << "0"; 1764239310Sdim return; 1765239310Sdim } 1766252723Sdim if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1767263509Sdim O << *getSymbol(GVar); 1768239310Sdim return; 1769239310Sdim } 1770252723Sdim if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1771252723Sdim const Value *v = Cexpr->stripPointerCasts(); 1772252723Sdim if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1773263509Sdim O << *getSymbol(GVar); 1774239310Sdim return; 1775239310Sdim } else { 1776239310Sdim O << *LowerConstant(CPV, *this); 1777239310Sdim return; 1778239310Sdim } 1779239310Sdim } 1780239310Sdim llvm_unreachable("Not scalar type found in printScalarConstant()"); 1781239310Sdim} 1782239310Sdim 1783252723Sdimvoid NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1784239310Sdim AggBuffer *aggBuffer) { 1785239310Sdim 1786245431Sdim const DataLayout *TD = TM.getDataLayout(); 1787239310Sdim 1788239310Sdim if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1789239310Sdim int s = TD->getTypeAllocSize(CPV->getType()); 1790252723Sdim if (s < Bytes) 1791239310Sdim s = Bytes; 1792239310Sdim aggBuffer->addZeros(s); 1793239310Sdim return; 1794239310Sdim } 1795239310Sdim 1796239310Sdim unsigned char *ptr; 1797239310Sdim switch (CPV->getType()->getTypeID()) { 1798239310Sdim 1799239310Sdim case Type::IntegerTyID: { 1800239310Sdim const Type *ETy = CPV->getType(); 1801252723Sdim if (ETy == Type::getInt8Ty(CPV->getContext())) { 1802239310Sdim unsigned char c = 1803239310Sdim (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1804239310Sdim ptr = &c; 1805239310Sdim aggBuffer->addBytes(ptr, 1, Bytes); 1806252723Sdim } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1807252723Sdim short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1808252723Sdim ptr = (unsigned char *)&int16; 1809239310Sdim aggBuffer->addBytes(ptr, 2, Bytes); 1810252723Sdim } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1811252723Sdim if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1812252723Sdim int int32 = (int)(constInt->getZExtValue()); 1813252723Sdim ptr = (unsigned char *)&int32; 1814239310Sdim aggBuffer->addBytes(ptr, 4, Bytes); 1815239310Sdim break; 1816252723Sdim } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1817252723Sdim if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1818252723Sdim ConstantFoldConstantExpression(Cexpr, TD))) { 1819252723Sdim int int32 = (int)(constInt->getZExtValue()); 1820252723Sdim ptr = (unsigned char *)&int32; 1821239310Sdim aggBuffer->addBytes(ptr, 4, Bytes); 1822239310Sdim break; 1823239310Sdim } 1824239310Sdim if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1825239310Sdim Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1826239310Sdim aggBuffer->addSymbol(v); 1827239310Sdim aggBuffer->addZeros(4); 1828239310Sdim break; 1829239310Sdim } 1830239310Sdim } 1831239310Sdim llvm_unreachable("unsupported integer const type"); 1832252723Sdim } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1833252723Sdim if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1834252723Sdim long long int64 = (long long)(constInt->getZExtValue()); 1835252723Sdim ptr = (unsigned char *)&int64; 1836239310Sdim aggBuffer->addBytes(ptr, 8, Bytes); 1837239310Sdim break; 1838252723Sdim } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1839252723Sdim if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1840252723Sdim ConstantFoldConstantExpression(Cexpr, TD))) { 1841252723Sdim long long int64 = (long long)(constInt->getZExtValue()); 1842252723Sdim ptr = (unsigned char *)&int64; 1843239310Sdim aggBuffer->addBytes(ptr, 8, Bytes); 1844239310Sdim break; 1845239310Sdim } 1846239310Sdim if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1847239310Sdim Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1848239310Sdim aggBuffer->addSymbol(v); 1849239310Sdim aggBuffer->addZeros(8); 1850239310Sdim break; 1851239310Sdim } 1852239310Sdim } 1853239310Sdim llvm_unreachable("unsupported integer const type"); 1854239310Sdim } else 1855239310Sdim llvm_unreachable("unsupported integer const type"); 1856239310Sdim break; 1857239310Sdim } 1858239310Sdim case Type::FloatTyID: 1859239310Sdim case Type::DoubleTyID: { 1860252723Sdim const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1861252723Sdim const Type *Ty = CFP->getType(); 1862239310Sdim if (Ty == Type::getFloatTy(CPV->getContext())) { 1863252723Sdim float float32 = (float) CFP->getValueAPF().convertToFloat(); 1864252723Sdim ptr = (unsigned char *)&float32; 1865239310Sdim aggBuffer->addBytes(ptr, 4, Bytes); 1866239310Sdim } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1867239310Sdim double float64 = CFP->getValueAPF().convertToDouble(); 1868252723Sdim ptr = (unsigned char *)&float64; 1869239310Sdim aggBuffer->addBytes(ptr, 8, Bytes); 1870252723Sdim } else { 1871239310Sdim llvm_unreachable("unsupported fp const type"); 1872239310Sdim } 1873239310Sdim break; 1874239310Sdim } 1875239310Sdim case Type::PointerTyID: { 1876252723Sdim if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1877239310Sdim aggBuffer->addSymbol(GVar); 1878252723Sdim } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1879252723Sdim const Value *v = Cexpr->stripPointerCasts(); 1880239310Sdim aggBuffer->addSymbol(v); 1881239310Sdim } 1882239310Sdim unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1883239310Sdim aggBuffer->addZeros(s); 1884239310Sdim break; 1885239310Sdim } 1886239310Sdim 1887239310Sdim case Type::ArrayTyID: 1888239310Sdim case Type::VectorTyID: 1889239310Sdim case Type::StructTyID: { 1890239310Sdim if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1891263509Sdim isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 1892239310Sdim int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1893239310Sdim bufferAggregateConstant(CPV, aggBuffer); 1894252723Sdim if (Bytes > ElementSize) 1895252723Sdim aggBuffer->addZeros(Bytes - ElementSize); 1896252723Sdim } else if (isa<ConstantAggregateZero>(CPV)) 1897239310Sdim aggBuffer->addZeros(Bytes); 1898239310Sdim else 1899239310Sdim llvm_unreachable("Unexpected Constant type"); 1900239310Sdim break; 1901239310Sdim } 1902239310Sdim 1903239310Sdim default: 1904239310Sdim llvm_unreachable("unsupported type"); 1905239310Sdim } 1906239310Sdim} 1907239310Sdim 1908252723Sdimvoid NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1909239310Sdim AggBuffer *aggBuffer) { 1910245431Sdim const DataLayout *TD = TM.getDataLayout(); 1911239310Sdim int Bytes; 1912239310Sdim 1913239310Sdim // Old constants 1914239310Sdim if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1915239310Sdim if (CPV->getNumOperands()) 1916239310Sdim for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1917239310Sdim bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1918239310Sdim return; 1919239310Sdim } 1920239310Sdim 1921239310Sdim if (const ConstantDataSequential *CDS = 1922252723Sdim dyn_cast<ConstantDataSequential>(CPV)) { 1923239310Sdim if (CDS->getNumElements()) 1924239310Sdim for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1925239310Sdim bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1926239310Sdim aggBuffer); 1927239310Sdim return; 1928239310Sdim } 1929239310Sdim 1930239310Sdim if (isa<ConstantStruct>(CPV)) { 1931239310Sdim if (CPV->getNumOperands()) { 1932239310Sdim StructType *ST = cast<StructType>(CPV->getType()); 1933239310Sdim for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1934252723Sdim if (i == (e - 1)) 1935239310Sdim Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1936252723Sdim TD->getTypeAllocSize(ST) - 1937252723Sdim TD->getStructLayout(ST)->getElementOffset(i); 1938239310Sdim else 1939252723Sdim Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1940252723Sdim TD->getStructLayout(ST)->getElementOffset(i); 1941252723Sdim bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1942239310Sdim } 1943239310Sdim } 1944239310Sdim return; 1945239310Sdim } 1946239310Sdim llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1947239310Sdim} 1948239310Sdim 1949239310Sdim// buildTypeNameMap - Run through symbol table looking for type names. 1950239310Sdim// 1951239310Sdim 1952239310Sdimbool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1953239310Sdim 1954239310Sdim std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1955239310Sdim 1956252723Sdim if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1957252723Sdim !PI->second.compare("struct._image2d_t") || 1958252723Sdim !PI->second.compare("struct._image3d_t"))) 1959239310Sdim return true; 1960239310Sdim 1961239310Sdim return false; 1962239310Sdim} 1963239310Sdim 1964239310Sdim 1965252723Sdimbool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1966252723Sdim switch (MI.getOpcode()) { 1967239310Sdim default: 1968239310Sdim return false; 1969252723Sdim case NVPTX::CallArgBeginInst: 1970252723Sdim case NVPTX::CallArgEndInst0: 1971252723Sdim case NVPTX::CallArgEndInst1: 1972252723Sdim case NVPTX::CallArgF32: 1973252723Sdim case NVPTX::CallArgF64: 1974252723Sdim case NVPTX::CallArgI16: 1975252723Sdim case NVPTX::CallArgI32: 1976252723Sdim case NVPTX::CallArgI32imm: 1977252723Sdim case NVPTX::CallArgI64: 1978252723Sdim case NVPTX::CallArgParam: 1979252723Sdim case NVPTX::CallVoidInst: 1980252723Sdim case NVPTX::CallVoidInstReg: 1981252723Sdim case NVPTX::Callseq_End: 1982239310Sdim case NVPTX::CallVoidInstReg64: 1983252723Sdim case NVPTX::DeclareParamInst: 1984252723Sdim case NVPTX::DeclareRetMemInst: 1985252723Sdim case NVPTX::DeclareRetRegInst: 1986252723Sdim case NVPTX::DeclareRetScalarInst: 1987252723Sdim case NVPTX::DeclareScalarParamInst: 1988252723Sdim case NVPTX::DeclareScalarRegInst: 1989252723Sdim case NVPTX::StoreParamF32: 1990252723Sdim case NVPTX::StoreParamF64: 1991252723Sdim case NVPTX::StoreParamI16: 1992252723Sdim case NVPTX::StoreParamI32: 1993252723Sdim case NVPTX::StoreParamI64: 1994252723Sdim case NVPTX::StoreParamI8: 1995252723Sdim case NVPTX::StoreRetvalF32: 1996252723Sdim case NVPTX::StoreRetvalF64: 1997252723Sdim case NVPTX::StoreRetvalI16: 1998252723Sdim case NVPTX::StoreRetvalI32: 1999252723Sdim case NVPTX::StoreRetvalI64: 2000252723Sdim case NVPTX::StoreRetvalI8: 2001252723Sdim case NVPTX::LastCallArgF32: 2002252723Sdim case NVPTX::LastCallArgF64: 2003252723Sdim case NVPTX::LastCallArgI16: 2004252723Sdim case NVPTX::LastCallArgI32: 2005252723Sdim case NVPTX::LastCallArgI32imm: 2006252723Sdim case NVPTX::LastCallArgI64: 2007252723Sdim case NVPTX::LastCallArgParam: 2008252723Sdim case NVPTX::LoadParamMemF32: 2009252723Sdim case NVPTX::LoadParamMemF64: 2010252723Sdim case NVPTX::LoadParamMemI16: 2011252723Sdim case NVPTX::LoadParamMemI32: 2012252723Sdim case NVPTX::LoadParamMemI64: 2013252723Sdim case NVPTX::LoadParamMemI8: 2014252723Sdim case NVPTX::PrototypeInst: 2015252723Sdim case NVPTX::DBG_VALUE: 2016239310Sdim return true; 2017239310Sdim } 2018239310Sdim return false; 2019239310Sdim} 2020239310Sdim 2021263509Sdim/// PrintAsmOperand - Print out an operand for an inline asm expression. 2022263509Sdim/// 2023263509Sdimbool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2024263509Sdim unsigned AsmVariant, 2025263509Sdim const char *ExtraCode, raw_ostream &O) { 2026263509Sdim if (ExtraCode && ExtraCode[0]) { 2027263509Sdim if (ExtraCode[1] != 0) 2028263509Sdim return true; // Unknown modifier. 2029263509Sdim 2030263509Sdim switch (ExtraCode[0]) { 2031263509Sdim default: 2032263509Sdim // See if this is a generic print operand 2033263509Sdim return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2034263509Sdim case 'r': 2035263509Sdim break; 2036263509Sdim } 2037263509Sdim } 2038263509Sdim 2039263509Sdim printOperand(MI, OpNo, O); 2040263509Sdim 2041263509Sdim return false; 2042263509Sdim} 2043263509Sdim 2044263509Sdimbool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2045263509Sdim const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2046263509Sdim const char *ExtraCode, raw_ostream &O) { 2047263509Sdim if (ExtraCode && ExtraCode[0]) 2048263509Sdim return true; // Unknown modifier 2049263509Sdim 2050263509Sdim O << '['; 2051263509Sdim printMemOperand(MI, OpNo, O); 2052263509Sdim O << ']'; 2053263509Sdim 2054263509Sdim return false; 2055263509Sdim} 2056263509Sdim 2057263509Sdimvoid NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2058263509Sdim raw_ostream &O, const char *Modifier) { 2059263509Sdim const MachineOperand &MO = MI->getOperand(opNum); 2060263509Sdim switch (MO.getType()) { 2061263509Sdim case MachineOperand::MO_Register: 2062263509Sdim if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2063263509Sdim if (MO.getReg() == NVPTX::VRDepot) 2064263509Sdim O << DEPOTNAME << getFunctionNumber(); 2065263509Sdim else 2066263509Sdim O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2067263509Sdim } else { 2068263509Sdim emitVirtualRegister(MO.getReg(), O); 2069263509Sdim } 2070263509Sdim return; 2071263509Sdim 2072263509Sdim case MachineOperand::MO_Immediate: 2073263509Sdim if (!Modifier) 2074263509Sdim O << MO.getImm(); 2075263509Sdim else if (strstr(Modifier, "vec") == Modifier) 2076263509Sdim printVecModifiedImmediate(MO, Modifier, O); 2077263509Sdim else 2078263509Sdim llvm_unreachable( 2079263509Sdim "Don't know how to handle modifier on immediate operand"); 2080263509Sdim return; 2081263509Sdim 2082263509Sdim case MachineOperand::MO_FPImmediate: 2083263509Sdim printFPConstant(MO.getFPImm(), O); 2084263509Sdim break; 2085263509Sdim 2086263509Sdim case MachineOperand::MO_GlobalAddress: 2087263509Sdim O << *getSymbol(MO.getGlobal()); 2088263509Sdim break; 2089263509Sdim 2090263509Sdim case MachineOperand::MO_ExternalSymbol: { 2091263509Sdim const char *symbname = MO.getSymbolName(); 2092263509Sdim if (strstr(symbname, ".PARAM") == symbname) { 2093263509Sdim unsigned index; 2094263509Sdim sscanf(symbname + 6, "%u[];", &index); 2095263509Sdim printParamName(index, O); 2096263509Sdim } else if (strstr(symbname, ".HLPPARAM") == symbname) { 2097263509Sdim unsigned index; 2098263509Sdim sscanf(symbname + 9, "%u[];", &index); 2099263509Sdim O << *CurrentFnSym << "_param_" << index << "_offset"; 2100263509Sdim } else 2101263509Sdim O << symbname; 2102263509Sdim break; 2103263509Sdim } 2104263509Sdim 2105263509Sdim case MachineOperand::MO_MachineBasicBlock: 2106263509Sdim O << *MO.getMBB()->getSymbol(); 2107263509Sdim return; 2108263509Sdim 2109263509Sdim default: 2110263509Sdim llvm_unreachable("Operand type not supported."); 2111263509Sdim } 2112263509Sdim} 2113263509Sdim 2114263509Sdimvoid NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2115263509Sdim raw_ostream &O, const char *Modifier) { 2116263509Sdim printOperand(MI, opNum, O); 2117263509Sdim 2118263509Sdim if (Modifier && !strcmp(Modifier, "add")) { 2119263509Sdim O << ", "; 2120263509Sdim printOperand(MI, opNum + 1, O); 2121263509Sdim } else { 2122263509Sdim if (MI->getOperand(opNum + 1).isImm() && 2123263509Sdim MI->getOperand(opNum + 1).getImm() == 0) 2124263509Sdim return; // don't print ',0' or '+0' 2125263509Sdim O << "+"; 2126263509Sdim printOperand(MI, opNum + 1, O); 2127263509Sdim } 2128263509Sdim} 2129263509Sdim 2130263509Sdim 2131239310Sdim// Force static initialization. 2132239310Sdimextern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2133239310Sdim RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2134239310Sdim RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2135239310Sdim} 2136239310Sdim 2137239310Sdimvoid NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2138239310Sdim std::stringstream temp; 2139252723Sdim LineReader *reader = this->getReader(filename.str()); 2140239310Sdim temp << "\n//"; 2141239310Sdim temp << filename.str(); 2142239310Sdim temp << ":"; 2143239310Sdim temp << line; 2144239310Sdim temp << " "; 2145239310Sdim temp << reader->readLine(line); 2146239310Sdim temp << "\n"; 2147239310Sdim this->OutStreamer.EmitRawText(Twine(temp.str())); 2148239310Sdim} 2149239310Sdim 2150239310SdimLineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2151252723Sdim if (reader == NULL) { 2152252723Sdim reader = new LineReader(filename); 2153239310Sdim } 2154239310Sdim 2155239310Sdim if (reader->fileName() != filename) { 2156239310Sdim delete reader; 2157252723Sdim reader = new LineReader(filename); 2158239310Sdim } 2159239310Sdim 2160239310Sdim return reader; 2161239310Sdim} 2162239310Sdim 2163252723Sdimstd::string LineReader::readLine(unsigned lineNum) { 2164239310Sdim if (lineNum < theCurLine) { 2165239310Sdim theCurLine = 0; 2166252723Sdim fstr.seekg(0, std::ios::beg); 2167239310Sdim } 2168239310Sdim while (theCurLine < lineNum) { 2169252723Sdim fstr.getline(buff, 500); 2170239310Sdim theCurLine++; 2171239310Sdim } 2172239310Sdim return buff; 2173239310Sdim} 2174239310Sdim 2175239310Sdim// Force static initialization. 2176239310Sdimextern "C" void LLVMInitializeNVPTXAsmPrinter() { 2177239310Sdim RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2178239310Sdim RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2179239310Sdim} 2180