NVPTXAsmPrinter.cpp revision 252723
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "MCTargetDesc/NVPTXMCAsmInfo.h" 17#include "NVPTX.h" 18#include "NVPTXInstrInfo.h" 19#include "NVPTXNumRegisters.h" 20#include "NVPTXRegisterInfo.h" 21#include "NVPTXTargetMachine.h" 22#include "NVPTXUtilities.h" 23#include "cl_common_defines.h" 24#include "llvm/ADT/StringExtras.h" 25#include "llvm/Analysis/ConstantFolding.h" 26#include "llvm/Assembly/Writer.h" 27#include "llvm/CodeGen/Analysis.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineModuleInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/DebugInfo.h" 32#include "llvm/IR/DerivedTypes.h" 33#include "llvm/IR/Function.h" 34#include "llvm/IR/GlobalVariable.h" 35#include "llvm/IR/Module.h" 36#include "llvm/IR/Operator.h" 37#include "llvm/MC/MCStreamer.h" 38#include "llvm/MC/MCSymbol.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/FormattedStream.h" 42#include "llvm/Support/Path.h" 43#include "llvm/Support/TargetRegistry.h" 44#include "llvm/Support/TimeValue.h" 45#include "llvm/Target/Mangler.h" 46#include "llvm/Target/TargetLoweringObjectFile.h" 47#include <sstream> 48using namespace llvm; 49 50#include "NVPTXGenAsmWriter.inc" 51 52bool RegAllocNilUsed = true; 53 54#define DEPOTNAME "__local_depot" 55 56static cl::opt<bool> 57EmitLineNumbers("nvptx-emit-line-numbers", 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61namespace llvm { bool InterleaveSrcInPtx = false; } 62 63static cl::opt<bool, true> 64InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, 65 cl::desc("NVPTX Specific: Emit source line in ptx file"), 66 cl::location(llvm::InterleaveSrcInPtx)); 67 68namespace { 69/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 70/// depends. 71void DiscoverDependentGlobals(const Value *V, 72 DenseSet<const GlobalVariable *> &Globals) { 73 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 74 Globals.insert(GV); 75 else { 76 if (const User *U = dyn_cast<User>(V)) { 77 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 78 DiscoverDependentGlobals(U->getOperand(i), Globals); 79 } 80 } 81 } 82} 83 84/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 85/// instances to be emitted, but only after any dependents have been added 86/// first. 87void VisitGlobalVariableForEmission( 88 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 89 DenseSet<const GlobalVariable *> &Visited, 90 DenseSet<const GlobalVariable *> &Visiting) { 91 // Have we already visited this one? 92 if (Visited.count(GV)) 93 return; 94 95 // Do we have a circular dependency? 96 if (Visiting.count(GV)) 97 report_fatal_error("Circular dependency found in global variable set"); 98 99 // Start visiting this global 100 Visiting.insert(GV); 101 102 // Make sure we visit all dependents first 103 DenseSet<const GlobalVariable *> Others; 104 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 105 DiscoverDependentGlobals(GV->getOperand(i), Others); 106 107 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 108 E = Others.end(); 109 I != E; ++I) 110 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 111 112 // Now we can visit ourself 113 Order.push_back(GV); 114 Visited.insert(GV); 115 Visiting.erase(GV); 116} 117} 118 119// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 120// cannot just link to the existing version. 121/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 122/// 123using namespace nvptx; 124const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 125 MCContext &Ctx = AP.OutContext; 126 127 if (CV->isNullValue() || isa<UndefValue>(CV)) 128 return MCConstantExpr::Create(0, Ctx); 129 130 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 131 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 132 133 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 134 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 135 136 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 137 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 138 139 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 140 if (CE == 0) 141 llvm_unreachable("Unknown constant value to lower!"); 142 143 switch (CE->getOpcode()) { 144 default: 145 // If the code isn't optimized, there may be outstanding folding 146 // opportunities. Attempt to fold the expression using DataLayout as a 147 // last resort before giving up. 148 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 149 if (C != CE) 150 return LowerConstant(C, AP); 151 152 // Otherwise report the problem to the user. 153 { 154 std::string S; 155 raw_string_ostream OS(S); 156 OS << "Unsupported expression in static initializer: "; 157 WriteAsOperand(OS, CE, /*PrintType=*/ false, 158 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 159 report_fatal_error(OS.str()); 160 } 161 case Instruction::GetElementPtr: { 162 const DataLayout &TD = *AP.TM.getDataLayout(); 163 // Generate a symbolic expression for the byte address 164 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 165 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 166 167 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 168 if (!OffsetAI) 169 return Base; 170 171 int64_t Offset = OffsetAI.getSExtValue(); 172 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 173 Ctx); 174 } 175 176 case Instruction::Trunc: 177 // We emit the value and depend on the assembler to truncate the generated 178 // expression properly. This is important for differences between 179 // blockaddress labels. Since the two labels are in the same function, it 180 // is reasonable to treat their delta as a 32-bit value. 181 // FALL THROUGH. 182 case Instruction::BitCast: 183 return LowerConstant(CE->getOperand(0), AP); 184 185 case Instruction::IntToPtr: { 186 const DataLayout &TD = *AP.TM.getDataLayout(); 187 // Handle casts to pointers by changing them into casts to the appropriate 188 // integer type. This promotes constant folding and simplifies this code. 189 Constant *Op = CE->getOperand(0); 190 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 191 false /*ZExt*/); 192 return LowerConstant(Op, AP); 193 } 194 195 case Instruction::PtrToInt: { 196 const DataLayout &TD = *AP.TM.getDataLayout(); 197 // Support only foldable casts to/from pointers that can be eliminated by 198 // changing the pointer to the appropriately sized integer type. 199 Constant *Op = CE->getOperand(0); 200 Type *Ty = CE->getType(); 201 202 const MCExpr *OpExpr = LowerConstant(Op, AP); 203 204 // We can emit the pointer value into this slot if the slot is an 205 // integer slot equal to the size of the pointer. 206 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 207 return OpExpr; 208 209 // Otherwise the pointer is smaller than the resultant integer, mask off 210 // the high bits so we are sure to get a proper truncation if the input is 211 // a constant expr. 212 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 213 const MCExpr *MaskExpr = 214 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 215 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 216 } 217 218 // The MC library also has a right-shift operator, but it isn't consistently 219 // signed or unsigned between different targets. 220 case Instruction::Add: 221 case Instruction::Sub: 222 case Instruction::Mul: 223 case Instruction::SDiv: 224 case Instruction::SRem: 225 case Instruction::Shl: 226 case Instruction::And: 227 case Instruction::Or: 228 case Instruction::Xor: { 229 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 230 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 231 switch (CE->getOpcode()) { 232 default: 233 llvm_unreachable("Unknown binary operator constant cast expr"); 234 case Instruction::Add: 235 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 236 case Instruction::Sub: 237 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 238 case Instruction::Mul: 239 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 240 case Instruction::SDiv: 241 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 242 case Instruction::SRem: 243 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 244 case Instruction::Shl: 245 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 246 case Instruction::And: 247 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 248 case Instruction::Or: 249 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 250 case Instruction::Xor: 251 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 252 } 253 } 254 } 255} 256 257void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 258 if (!EmitLineNumbers) 259 return; 260 if (ignoreLoc(MI)) 261 return; 262 263 DebugLoc curLoc = MI.getDebugLoc(); 264 265 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 266 return; 267 268 if (prevDebugLoc == curLoc) 269 return; 270 271 prevDebugLoc = curLoc; 272 273 if (curLoc.isUnknown()) 274 return; 275 276 const MachineFunction *MF = MI.getParent()->getParent(); 277 //const TargetMachine &TM = MF->getTarget(); 278 279 const LLVMContext &ctx = MF->getFunction()->getContext(); 280 DIScope Scope(curLoc.getScope(ctx)); 281 282 if (!Scope.Verify()) 283 return; 284 285 StringRef fileName(Scope.getFilename()); 286 StringRef dirName(Scope.getDirectory()); 287 SmallString<128> FullPathName = dirName; 288 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 289 sys::path::append(FullPathName, fileName); 290 fileName = FullPathName.str(); 291 } 292 293 if (filenameMap.find(fileName.str()) == filenameMap.end()) 294 return; 295 296 // Emit the line from the source file. 297 if (llvm::InterleaveSrcInPtx) 298 this->emitSrcInText(fileName.str(), curLoc.getLine()); 299 300 std::stringstream temp; 301 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 302 << " " << curLoc.getCol(); 303 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 304} 305 306void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 307 SmallString<128> Str; 308 raw_svector_ostream OS(Str); 309 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 310 emitLineNumberAsDotLoc(*MI); 311 printInstruction(MI, OS); 312 OutStreamer.EmitRawText(OS.str()); 313} 314 315void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 316 const DataLayout *TD = TM.getDataLayout(); 317 const TargetLowering *TLI = TM.getTargetLowering(); 318 319 Type *Ty = F->getReturnType(); 320 321 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 322 323 if (Ty->getTypeID() == Type::VoidTyID) 324 return; 325 326 O << " ("; 327 328 if (isABI) { 329 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 330 unsigned size = 0; 331 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 332 size = ITy->getBitWidth(); 333 if (size < 32) 334 size = 32; 335 } else { 336 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 337 size = Ty->getPrimitiveSizeInBits(); 338 } 339 340 O << ".param .b" << size << " func_retval0"; 341 } else if (isa<PointerType>(Ty)) { 342 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 343 << " func_retval0"; 344 } else { 345 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 346 SmallVector<EVT, 16> vtparts; 347 ComputeValueVTs(*TLI, Ty, vtparts); 348 unsigned totalsz = 0; 349 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 350 unsigned elems = 1; 351 EVT elemtype = vtparts[i]; 352 if (vtparts[i].isVector()) { 353 elems = vtparts[i].getVectorNumElements(); 354 elemtype = vtparts[i].getVectorElementType(); 355 } 356 for (unsigned j = 0, je = elems; j != je; ++j) { 357 unsigned sz = elemtype.getSizeInBits(); 358 if (elemtype.isInteger() && (sz < 8)) 359 sz = 8; 360 totalsz += sz / 8; 361 } 362 } 363 unsigned retAlignment = 0; 364 if (!llvm::getAlign(*F, 0, retAlignment)) 365 retAlignment = TD->getABITypeAlignment(Ty); 366 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 367 << "]"; 368 } else 369 assert(false && "Unknown return type"); 370 } 371 } else { 372 SmallVector<EVT, 16> vtparts; 373 ComputeValueVTs(*TLI, Ty, vtparts); 374 unsigned idx = 0; 375 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 376 unsigned elems = 1; 377 EVT elemtype = vtparts[i]; 378 if (vtparts[i].isVector()) { 379 elems = vtparts[i].getVectorNumElements(); 380 elemtype = vtparts[i].getVectorElementType(); 381 } 382 383 for (unsigned j = 0, je = elems; j != je; ++j) { 384 unsigned sz = elemtype.getSizeInBits(); 385 if (elemtype.isInteger() && (sz < 32)) 386 sz = 32; 387 O << ".reg .b" << sz << " func_retval" << idx; 388 if (j < je - 1) 389 O << ", "; 390 ++idx; 391 } 392 if (i < e - 1) 393 O << ", "; 394 } 395 } 396 O << ") "; 397 return; 398} 399 400void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 401 raw_ostream &O) { 402 const Function *F = MF.getFunction(); 403 printReturnValStr(F, O); 404} 405 406void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 407 SmallString<128> Str; 408 raw_svector_ostream O(Str); 409 410 if (!GlobalsEmitted) { 411 emitGlobals(*MF->getFunction()->getParent()); 412 GlobalsEmitted = true; 413 } 414 415 // Set up 416 MRI = &MF->getRegInfo(); 417 F = MF->getFunction(); 418 emitLinkageDirective(F, O); 419 if (llvm::isKernelFunction(*F)) 420 O << ".entry "; 421 else { 422 O << ".func "; 423 printReturnValStr(*MF, O); 424 } 425 426 O << *CurrentFnSym; 427 428 emitFunctionParamList(*MF, O); 429 430 if (llvm::isKernelFunction(*F)) 431 emitKernelFunctionDirectives(*F, O); 432 433 OutStreamer.EmitRawText(O.str()); 434 435 prevDebugLoc = DebugLoc(); 436} 437 438void NVPTXAsmPrinter::EmitFunctionBodyStart() { 439 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 440 unsigned numRegClasses = TRI.getNumRegClasses(); 441 VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; 442 OutStreamer.EmitRawText(StringRef("{\n")); 443 setAndEmitFunctionVirtualRegisters(*MF); 444 445 SmallString<128> Str; 446 raw_svector_ostream O(Str); 447 emitDemotedVars(MF->getFunction(), O); 448 OutStreamer.EmitRawText(O.str()); 449} 450 451void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 452 OutStreamer.EmitRawText(StringRef("}\n")); 453 delete[] VRidGlobal2LocalMap; 454} 455 456void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 457 raw_ostream &O) const { 458 // If the NVVM IR has some of reqntid* specified, then output 459 // the reqntid directive, and set the unspecified ones to 1. 460 // If none of reqntid* is specified, don't output reqntid directive. 461 unsigned reqntidx, reqntidy, reqntidz; 462 bool specified = false; 463 if (llvm::getReqNTIDx(F, reqntidx) == false) 464 reqntidx = 1; 465 else 466 specified = true; 467 if (llvm::getReqNTIDy(F, reqntidy) == false) 468 reqntidy = 1; 469 else 470 specified = true; 471 if (llvm::getReqNTIDz(F, reqntidz) == false) 472 reqntidz = 1; 473 else 474 specified = true; 475 476 if (specified) 477 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 478 << "\n"; 479 480 // If the NVVM IR has some of maxntid* specified, then output 481 // the maxntid directive, and set the unspecified ones to 1. 482 // If none of maxntid* is specified, don't output maxntid directive. 483 unsigned maxntidx, maxntidy, maxntidz; 484 specified = false; 485 if (llvm::getMaxNTIDx(F, maxntidx) == false) 486 maxntidx = 1; 487 else 488 specified = true; 489 if (llvm::getMaxNTIDy(F, maxntidy) == false) 490 maxntidy = 1; 491 else 492 specified = true; 493 if (llvm::getMaxNTIDz(F, maxntidz) == false) 494 maxntidz = 1; 495 else 496 specified = true; 497 498 if (specified) 499 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 500 << "\n"; 501 502 unsigned mincta; 503 if (llvm::getMinCTASm(F, mincta)) 504 O << ".minnctapersm " << mincta << "\n"; 505} 506 507void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 508 raw_ostream &O) { 509 const TargetRegisterClass *RC = MRI->getRegClass(vr); 510 unsigned id = RC->getID(); 511 512 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; 513 unsigned mapped_vr = regmap[vr]; 514 515 if (!isVec) { 516 O << getNVPTXRegClassStr(RC) << mapped_vr; 517 return; 518 } 519 report_fatal_error("Bad register!"); 520} 521 522void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 523 raw_ostream &O) { 524 getVirtualRegisterName(vr, isVec, O); 525} 526 527void NVPTXAsmPrinter::printVecModifiedImmediate( 528 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 529 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 530 int Imm = (int) MO.getImm(); 531 if (0 == strcmp(Modifier, "vecelem")) 532 O << "_" << vecelem[Imm]; 533 else if (0 == strcmp(Modifier, "vecv4comm1")) { 534 if ((Imm < 0) || (Imm > 3)) 535 O << "//"; 536 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 537 if ((Imm < 4) || (Imm > 7)) 538 O << "//"; 539 } else if (0 == strcmp(Modifier, "vecv4pos")) { 540 if (Imm < 0) 541 Imm = 0; 542 O << "_" << vecelem[Imm % 4]; 543 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 544 if ((Imm < 0) || (Imm > 1)) 545 O << "//"; 546 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 547 if ((Imm < 2) || (Imm > 3)) 548 O << "//"; 549 } else if (0 == strcmp(Modifier, "vecv2pos")) { 550 if (Imm < 0) 551 Imm = 0; 552 O << "_" << vecelem[Imm % 2]; 553 } else 554 llvm_unreachable("Unknown Modifier on immediate operand"); 555} 556 557void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 558 raw_ostream &O, const char *Modifier) { 559 const MachineOperand &MO = MI->getOperand(opNum); 560 switch (MO.getType()) { 561 case MachineOperand::MO_Register: 562 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 563 if (MO.getReg() == NVPTX::VRDepot) 564 O << DEPOTNAME << getFunctionNumber(); 565 else 566 O << getRegisterName(MO.getReg()); 567 } else { 568 if (!Modifier) 569 emitVirtualRegister(MO.getReg(), false, O); 570 else { 571 if (strcmp(Modifier, "vecfull") == 0) 572 emitVirtualRegister(MO.getReg(), true, O); 573 else 574 llvm_unreachable( 575 "Don't know how to handle the modifier on virtual register."); 576 } 577 } 578 return; 579 580 case MachineOperand::MO_Immediate: 581 if (!Modifier) 582 O << MO.getImm(); 583 else if (strstr(Modifier, "vec") == Modifier) 584 printVecModifiedImmediate(MO, Modifier, O); 585 else 586 llvm_unreachable( 587 "Don't know how to handle modifier on immediate operand"); 588 return; 589 590 case MachineOperand::MO_FPImmediate: 591 printFPConstant(MO.getFPImm(), O); 592 break; 593 594 case MachineOperand::MO_GlobalAddress: 595 O << *Mang->getSymbol(MO.getGlobal()); 596 break; 597 598 case MachineOperand::MO_ExternalSymbol: { 599 const char *symbname = MO.getSymbolName(); 600 if (strstr(symbname, ".PARAM") == symbname) { 601 unsigned index; 602 sscanf(symbname + 6, "%u[];", &index); 603 printParamName(index, O); 604 } else if (strstr(symbname, ".HLPPARAM") == symbname) { 605 unsigned index; 606 sscanf(symbname + 9, "%u[];", &index); 607 O << *CurrentFnSym << "_param_" << index << "_offset"; 608 } else 609 O << symbname; 610 break; 611 } 612 613 case MachineOperand::MO_MachineBasicBlock: 614 O << *MO.getMBB()->getSymbol(); 615 return; 616 617 default: 618 llvm_unreachable("Operand type not supported."); 619 } 620} 621 622void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, 623 raw_ostream &O) const { 624#ifndef __OPTIMIZE__ 625 O << "\t// Implicit def :"; 626 //printOperand(MI, 0); 627 O << "\n"; 628#endif 629} 630 631void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 632 raw_ostream &O, const char *Modifier) { 633 printOperand(MI, opNum, O); 634 635 if (Modifier && !strcmp(Modifier, "add")) { 636 O << ", "; 637 printOperand(MI, opNum + 1, O); 638 } else { 639 if (MI->getOperand(opNum + 1).isImm() && 640 MI->getOperand(opNum + 1).getImm() == 0) 641 return; // don't print ',0' or '+0' 642 O << "+"; 643 printOperand(MI, opNum + 1, O); 644 } 645} 646 647void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, 648 raw_ostream &O, const char *Modifier) { 649 if (Modifier) { 650 const MachineOperand &MO = MI->getOperand(opNum); 651 int Imm = (int) MO.getImm(); 652 if (!strcmp(Modifier, "volatile")) { 653 if (Imm) 654 O << ".volatile"; 655 } else if (!strcmp(Modifier, "addsp")) { 656 switch (Imm) { 657 case NVPTX::PTXLdStInstCode::GLOBAL: 658 O << ".global"; 659 break; 660 case NVPTX::PTXLdStInstCode::SHARED: 661 O << ".shared"; 662 break; 663 case NVPTX::PTXLdStInstCode::LOCAL: 664 O << ".local"; 665 break; 666 case NVPTX::PTXLdStInstCode::PARAM: 667 O << ".param"; 668 break; 669 case NVPTX::PTXLdStInstCode::CONSTANT: 670 O << ".const"; 671 break; 672 case NVPTX::PTXLdStInstCode::GENERIC: 673 if (!nvptxSubtarget.hasGenericLdSt()) 674 O << ".global"; 675 break; 676 default: 677 llvm_unreachable("Wrong Address Space"); 678 } 679 } else if (!strcmp(Modifier, "sign")) { 680 if (Imm == NVPTX::PTXLdStInstCode::Signed) 681 O << "s"; 682 else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) 683 O << "u"; 684 else 685 O << "f"; 686 } else if (!strcmp(Modifier, "vec")) { 687 if (Imm == NVPTX::PTXLdStInstCode::V2) 688 O << ".v2"; 689 else if (Imm == NVPTX::PTXLdStInstCode::V4) 690 O << ".v4"; 691 } else 692 llvm_unreachable("Unknown Modifier"); 693 } else 694 llvm_unreachable("Empty Modifier"); 695} 696 697void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 698 699 emitLinkageDirective(F, O); 700 if (llvm::isKernelFunction(*F)) 701 O << ".entry "; 702 else 703 O << ".func "; 704 printReturnValStr(F, O); 705 O << *Mang->getSymbol(F) << "\n"; 706 emitFunctionParamList(F, O); 707 O << ";\n"; 708} 709 710static bool usedInGlobalVarDef(const Constant *C) { 711 if (!C) 712 return false; 713 714 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 715 if (GV->getName().str() == "llvm.used") 716 return false; 717 return true; 718 } 719 720 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 721 ui != ue; ++ui) { 722 const Constant *C = dyn_cast<Constant>(*ui); 723 if (usedInGlobalVarDef(C)) 724 return true; 725 } 726 return false; 727} 728 729static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 730 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 731 if (othergv->getName().str() == "llvm.used") 732 return true; 733 } 734 735 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 736 if (instr->getParent() && instr->getParent()->getParent()) { 737 const Function *curFunc = instr->getParent()->getParent(); 738 if (oneFunc && (curFunc != oneFunc)) 739 return false; 740 oneFunc = curFunc; 741 return true; 742 } else 743 return false; 744 } 745 746 if (const MDNode *md = dyn_cast<MDNode>(U)) 747 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 748 (md->getName().str() == "llvm.dbg.sp"))) 749 return true; 750 751 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 752 ui != ue; ++ui) { 753 if (usedInOneFunc(*ui, oneFunc) == false) 754 return false; 755 } 756 return true; 757} 758 759/* Find out if a global variable can be demoted to local scope. 760 * Currently, this is valid for CUDA shared variables, which have local 761 * scope and global lifetime. So the conditions to check are : 762 * 1. Is the global variable in shared address space? 763 * 2. Does it have internal linkage? 764 * 3. Is the global variable referenced only in one function? 765 */ 766static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 767 if (gv->hasInternalLinkage() == false) 768 return false; 769 const PointerType *Pty = gv->getType(); 770 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 771 return false; 772 773 const Function *oneFunc = 0; 774 775 bool flag = usedInOneFunc(gv, oneFunc); 776 if (flag == false) 777 return false; 778 if (!oneFunc) 779 return false; 780 f = oneFunc; 781 return true; 782} 783 784static bool useFuncSeen(const Constant *C, 785 llvm::DenseMap<const Function *, bool> &seenMap) { 786 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 787 ui != ue; ++ui) { 788 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 789 if (useFuncSeen(cu, seenMap)) 790 return true; 791 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 792 const BasicBlock *bb = I->getParent(); 793 if (!bb) 794 continue; 795 const Function *caller = bb->getParent(); 796 if (!caller) 797 continue; 798 if (seenMap.find(caller) != seenMap.end()) 799 return true; 800 } 801 } 802 return false; 803} 804 805void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 806 llvm::DenseMap<const Function *, bool> seenMap; 807 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 808 const Function *F = FI; 809 810 if (F->isDeclaration()) { 811 if (F->use_empty()) 812 continue; 813 if (F->getIntrinsicID()) 814 continue; 815 emitDeclaration(F, O); 816 continue; 817 } 818 for (Value::const_use_iterator iter = F->use_begin(), 819 iterEnd = F->use_end(); 820 iter != iterEnd; ++iter) { 821 if (const Constant *C = dyn_cast<Constant>(*iter)) { 822 if (usedInGlobalVarDef(C)) { 823 // The use is in the initialization of a global variable 824 // that is a function pointer, so print a declaration 825 // for the original function 826 emitDeclaration(F, O); 827 break; 828 } 829 // Emit a declaration of this function if the function that 830 // uses this constant expr has already been seen. 831 if (useFuncSeen(C, seenMap)) { 832 emitDeclaration(F, O); 833 break; 834 } 835 } 836 837 if (!isa<Instruction>(*iter)) 838 continue; 839 const Instruction *instr = cast<Instruction>(*iter); 840 const BasicBlock *bb = instr->getParent(); 841 if (!bb) 842 continue; 843 const Function *caller = bb->getParent(); 844 if (!caller) 845 continue; 846 847 // If a caller has already been seen, then the caller is 848 // appearing in the module before the callee. so print out 849 // a declaration for the callee. 850 if (seenMap.find(caller) != seenMap.end()) { 851 emitDeclaration(F, O); 852 break; 853 } 854 } 855 seenMap[F] = true; 856 } 857} 858 859void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 860 DebugInfoFinder DbgFinder; 861 DbgFinder.processModule(M); 862 863 unsigned i = 1; 864 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 865 E = DbgFinder.compile_unit_end(); 866 I != E; ++I) { 867 DICompileUnit DIUnit(*I); 868 StringRef Filename(DIUnit.getFilename()); 869 StringRef Dirname(DIUnit.getDirectory()); 870 SmallString<128> FullPathName = Dirname; 871 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 872 sys::path::append(FullPathName, Filename); 873 Filename = FullPathName.str(); 874 } 875 if (filenameMap.find(Filename.str()) != filenameMap.end()) 876 continue; 877 filenameMap[Filename.str()] = i; 878 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 879 ++i; 880 } 881 882 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 883 E = DbgFinder.subprogram_end(); 884 I != E; ++I) { 885 DISubprogram SP(*I); 886 StringRef Filename(SP.getFilename()); 887 StringRef Dirname(SP.getDirectory()); 888 SmallString<128> FullPathName = Dirname; 889 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 890 sys::path::append(FullPathName, Filename); 891 Filename = FullPathName.str(); 892 } 893 if (filenameMap.find(Filename.str()) != filenameMap.end()) 894 continue; 895 filenameMap[Filename.str()] = i; 896 ++i; 897 } 898} 899 900bool NVPTXAsmPrinter::doInitialization(Module &M) { 901 902 SmallString<128> Str1; 903 raw_svector_ostream OS1(Str1); 904 905 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 906 MMI->AnalyzeModule(M); 907 908 // We need to call the parent's one explicitly. 909 //bool Result = AsmPrinter::doInitialization(M); 910 911 // Initialize TargetLoweringObjectFile. 912 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 913 .Initialize(OutContext, TM); 914 915 Mang = new Mangler(OutContext, *TM.getDataLayout()); 916 917 // Emit header before any dwarf directives are emitted below. 918 emitHeader(M, OS1); 919 OutStreamer.EmitRawText(OS1.str()); 920 921 // Already commented out 922 //bool Result = AsmPrinter::doInitialization(M); 923 924 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 925 recordAndEmitFilenames(M); 926 927 GlobalsEmitted = false; 928 929 return false; // success 930} 931 932void NVPTXAsmPrinter::emitGlobals(const Module &M) { 933 SmallString<128> Str2; 934 raw_svector_ostream OS2(Str2); 935 936 emitDeclarations(M, OS2); 937 938 // As ptxas does not support forward references of globals, we need to first 939 // sort the list of module-level globals in def-use order. We visit each 940 // global variable in order, and ensure that we emit it *after* its dependent 941 // globals. We use a little extra memory maintaining both a set and a list to 942 // have fast searches while maintaining a strict ordering. 943 SmallVector<const GlobalVariable *, 8> Globals; 944 DenseSet<const GlobalVariable *> GVVisited; 945 DenseSet<const GlobalVariable *> GVVisiting; 946 947 // Visit each global variable, in order 948 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 949 I != E; ++I) 950 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 951 952 assert(GVVisited.size() == M.getGlobalList().size() && 953 "Missed a global variable"); 954 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 955 956 // Print out module-level global variables in proper order 957 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 958 printModuleLevelGV(Globals[i], OS2); 959 960 OS2 << '\n'; 961 962 OutStreamer.EmitRawText(OS2.str()); 963} 964 965void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 966 O << "//\n"; 967 O << "// Generated by LLVM NVPTX Back-End\n"; 968 O << "//\n"; 969 O << "\n"; 970 971 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 972 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 973 974 O << ".target "; 975 O << nvptxSubtarget.getTargetName(); 976 977 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 978 O << ", texmode_independent"; 979 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 980 if (!nvptxSubtarget.hasDouble()) 981 O << ", map_f64_to_f32"; 982 } 983 984 if (MAI->doesSupportDebugInformation()) 985 O << ", debug"; 986 987 O << "\n"; 988 989 O << ".address_size "; 990 if (nvptxSubtarget.is64Bit()) 991 O << "64"; 992 else 993 O << "32"; 994 O << "\n"; 995 996 O << "\n"; 997} 998 999bool NVPTXAsmPrinter::doFinalization(Module &M) { 1000 1001 // If we did not emit any functions, then the global declarations have not 1002 // yet been emitted. 1003 if (!GlobalsEmitted) { 1004 emitGlobals(M); 1005 GlobalsEmitted = true; 1006 } 1007 1008 // XXX Temproarily remove global variables so that doFinalization() will not 1009 // emit them again (global variables are emitted at beginning). 1010 1011 Module::GlobalListType &global_list = M.getGlobalList(); 1012 int i, n = global_list.size(); 1013 GlobalVariable **gv_array = new GlobalVariable *[n]; 1014 1015 // first, back-up GlobalVariable in gv_array 1016 i = 0; 1017 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1018 I != E; ++I) 1019 gv_array[i++] = &*I; 1020 1021 // second, empty global_list 1022 while (!global_list.empty()) 1023 global_list.remove(global_list.begin()); 1024 1025 // call doFinalization 1026 bool ret = AsmPrinter::doFinalization(M); 1027 1028 // now we restore global variables 1029 for (i = 0; i < n; i++) 1030 global_list.insert(global_list.end(), gv_array[i]); 1031 1032 delete[] gv_array; 1033 return ret; 1034 1035 //bool Result = AsmPrinter::doFinalization(M); 1036 // Instead of calling the parents doFinalization, we may 1037 // clone parents doFinalization and customize here. 1038 // Currently, we if NVISA out the EmitGlobals() in 1039 // parent's doFinalization, which is too intrusive. 1040 // 1041 // Same for the doInitialization. 1042 //return Result; 1043} 1044 1045// This function emits appropriate linkage directives for 1046// functions and global variables. 1047// 1048// extern function declaration -> .extern 1049// extern function definition -> .visible 1050// external global variable with init -> .visible 1051// external without init -> .extern 1052// appending -> not allowed, assert. 1053 1054void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1055 raw_ostream &O) { 1056 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1057 if (V->hasExternalLinkage()) { 1058 if (isa<GlobalVariable>(V)) { 1059 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1060 if (GVar) { 1061 if (GVar->hasInitializer()) 1062 O << ".visible "; 1063 else 1064 O << ".extern "; 1065 } 1066 } else if (V->isDeclaration()) 1067 O << ".extern "; 1068 else 1069 O << ".visible "; 1070 } else if (V->hasAppendingLinkage()) { 1071 std::string msg; 1072 msg.append("Error: "); 1073 msg.append("Symbol "); 1074 if (V->hasName()) 1075 msg.append(V->getName().str()); 1076 msg.append("has unsupported appending linkage type"); 1077 llvm_unreachable(msg.c_str()); 1078 } 1079 } 1080} 1081 1082void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1083 raw_ostream &O, 1084 bool processDemoted) { 1085 1086 // Skip meta data 1087 if (GVar->hasSection()) { 1088 if (GVar->getSection() == "llvm.metadata") 1089 return; 1090 } 1091 1092 const DataLayout *TD = TM.getDataLayout(); 1093 1094 // GlobalVariables are always constant pointers themselves. 1095 const PointerType *PTy = GVar->getType(); 1096 Type *ETy = PTy->getElementType(); 1097 1098 if (GVar->hasExternalLinkage()) { 1099 if (GVar->hasInitializer()) 1100 O << ".visible "; 1101 else 1102 O << ".extern "; 1103 } 1104 1105 if (llvm::isTexture(*GVar)) { 1106 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1107 return; 1108 } 1109 1110 if (llvm::isSurface(*GVar)) { 1111 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1112 return; 1113 } 1114 1115 if (GVar->isDeclaration()) { 1116 // (extern) declarations, no definition or initializer 1117 // Currently the only known declaration is for an automatic __local 1118 // (.shared) promoted to global. 1119 emitPTXGlobalVariable(GVar, O); 1120 O << ";\n"; 1121 return; 1122 } 1123 1124 if (llvm::isSampler(*GVar)) { 1125 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1126 1127 const Constant *Initializer = NULL; 1128 if (GVar->hasInitializer()) 1129 Initializer = GVar->getInitializer(); 1130 const ConstantInt *CI = NULL; 1131 if (Initializer) 1132 CI = dyn_cast<ConstantInt>(Initializer); 1133 if (CI) { 1134 unsigned sample = CI->getZExtValue(); 1135 1136 O << " = { "; 1137 1138 for (int i = 0, 1139 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1140 i < 3; i++) { 1141 O << "addr_mode_" << i << " = "; 1142 switch (addr) { 1143 case 0: 1144 O << "wrap"; 1145 break; 1146 case 1: 1147 O << "clamp_to_border"; 1148 break; 1149 case 2: 1150 O << "clamp_to_edge"; 1151 break; 1152 case 3: 1153 O << "wrap"; 1154 break; 1155 case 4: 1156 O << "mirror"; 1157 break; 1158 } 1159 O << ", "; 1160 } 1161 O << "filter_mode = "; 1162 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1163 case 0: 1164 O << "nearest"; 1165 break; 1166 case 1: 1167 O << "linear"; 1168 break; 1169 case 2: 1170 assert(0 && "Anisotropic filtering is not supported"); 1171 default: 1172 O << "nearest"; 1173 break; 1174 } 1175 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1176 O << ", force_unnormalized_coords = 1"; 1177 } 1178 O << " }"; 1179 } 1180 1181 O << ";\n"; 1182 return; 1183 } 1184 1185 if (GVar->hasPrivateLinkage()) { 1186 1187 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1188 return; 1189 1190 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1191 if (!strncmp(GVar->getName().data(), "filename", 8)) 1192 return; 1193 if (GVar->use_empty()) 1194 return; 1195 } 1196 1197 const Function *demotedFunc = 0; 1198 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1199 O << "// " << GVar->getName().str() << " has been demoted\n"; 1200 if (localDecls.find(demotedFunc) != localDecls.end()) 1201 localDecls[demotedFunc].push_back(GVar); 1202 else { 1203 std::vector<const GlobalVariable *> temp; 1204 temp.push_back(GVar); 1205 localDecls[demotedFunc] = temp; 1206 } 1207 return; 1208 } 1209 1210 O << "."; 1211 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1212 if (GVar->getAlignment() == 0) 1213 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1214 else 1215 O << " .align " << GVar->getAlignment(); 1216 1217 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1218 O << " ."; 1219 // Special case: ABI requires that we use .u8 for predicates 1220 if (ETy->isIntegerTy(1)) 1221 O << "u8"; 1222 else 1223 O << getPTXFundamentalTypeStr(ETy, false); 1224 O << " "; 1225 O << *Mang->getSymbol(GVar); 1226 1227 // Ptx allows variable initilization only for constant and global state 1228 // spaces. 1229 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1230 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1231 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1232 GVar->hasInitializer()) { 1233 const Constant *Initializer = GVar->getInitializer(); 1234 if (!Initializer->isNullValue()) { 1235 O << " = "; 1236 printScalarConstant(Initializer, O); 1237 } 1238 } 1239 } else { 1240 unsigned int ElementSize = 0; 1241 1242 // Although PTX has direct support for struct type and array type and 1243 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1244 // targets that support these high level field accesses. Structs, arrays 1245 // and vectors are lowered into arrays of bytes. 1246 switch (ETy->getTypeID()) { 1247 case Type::StructTyID: 1248 case Type::ArrayTyID: 1249 case Type::VectorTyID: 1250 ElementSize = TD->getTypeStoreSize(ETy); 1251 // Ptx allows variable initilization only for constant and 1252 // global state spaces. 1253 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1254 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1255 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1256 GVar->hasInitializer()) { 1257 const Constant *Initializer = GVar->getInitializer(); 1258 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1259 AggBuffer aggBuffer(ElementSize, O, *this); 1260 bufferAggregateConstant(Initializer, &aggBuffer); 1261 if (aggBuffer.numSymbols) { 1262 if (nvptxSubtarget.is64Bit()) { 1263 O << " .u64 " << *Mang->getSymbol(GVar) << "["; 1264 O << ElementSize / 8; 1265 } else { 1266 O << " .u32 " << *Mang->getSymbol(GVar) << "["; 1267 O << ElementSize / 4; 1268 } 1269 O << "]"; 1270 } else { 1271 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1272 O << ElementSize; 1273 O << "]"; 1274 } 1275 O << " = {"; 1276 aggBuffer.print(); 1277 O << "}"; 1278 } else { 1279 O << " .b8 " << *Mang->getSymbol(GVar); 1280 if (ElementSize) { 1281 O << "["; 1282 O << ElementSize; 1283 O << "]"; 1284 } 1285 } 1286 } else { 1287 O << " .b8 " << *Mang->getSymbol(GVar); 1288 if (ElementSize) { 1289 O << "["; 1290 O << ElementSize; 1291 O << "]"; 1292 } 1293 } 1294 break; 1295 default: 1296 assert(0 && "type not supported yet"); 1297 } 1298 1299 } 1300 O << ";\n"; 1301} 1302 1303void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1304 if (localDecls.find(f) == localDecls.end()) 1305 return; 1306 1307 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1308 1309 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1310 O << "\t// demoted variable\n\t"; 1311 printModuleLevelGV(gvars[i], O, true); 1312 } 1313} 1314 1315void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1316 raw_ostream &O) const { 1317 switch (AddressSpace) { 1318 case llvm::ADDRESS_SPACE_LOCAL: 1319 O << "local"; 1320 break; 1321 case llvm::ADDRESS_SPACE_GLOBAL: 1322 O << "global"; 1323 break; 1324 case llvm::ADDRESS_SPACE_CONST: 1325 // This logic should be consistent with that in 1326 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) 1327 if (nvptxSubtarget.hasGenericLdSt()) 1328 O << "global"; 1329 else 1330 O << "const"; 1331 break; 1332 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1333 O << "const"; 1334 break; 1335 case llvm::ADDRESS_SPACE_SHARED: 1336 O << "shared"; 1337 break; 1338 default: 1339 report_fatal_error("Bad address space found while emitting PTX"); 1340 break; 1341 } 1342} 1343 1344std::string 1345NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1346 switch (Ty->getTypeID()) { 1347 default: 1348 llvm_unreachable("unexpected type"); 1349 break; 1350 case Type::IntegerTyID: { 1351 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1352 if (NumBits == 1) 1353 return "pred"; 1354 else if (NumBits <= 64) { 1355 std::string name = "u"; 1356 return name + utostr(NumBits); 1357 } else { 1358 llvm_unreachable("Integer too large"); 1359 break; 1360 } 1361 break; 1362 } 1363 case Type::FloatTyID: 1364 return "f32"; 1365 case Type::DoubleTyID: 1366 return "f64"; 1367 case Type::PointerTyID: 1368 if (nvptxSubtarget.is64Bit()) 1369 if (useB4PTR) 1370 return "b64"; 1371 else 1372 return "u64"; 1373 else if (useB4PTR) 1374 return "b32"; 1375 else 1376 return "u32"; 1377 } 1378 llvm_unreachable("unexpected type"); 1379 return NULL; 1380} 1381 1382void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1383 raw_ostream &O) { 1384 1385 const DataLayout *TD = TM.getDataLayout(); 1386 1387 // GlobalVariables are always constant pointers themselves. 1388 const PointerType *PTy = GVar->getType(); 1389 Type *ETy = PTy->getElementType(); 1390 1391 O << "."; 1392 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1393 if (GVar->getAlignment() == 0) 1394 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1395 else 1396 O << " .align " << GVar->getAlignment(); 1397 1398 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1399 O << " ."; 1400 O << getPTXFundamentalTypeStr(ETy); 1401 O << " "; 1402 O << *Mang->getSymbol(GVar); 1403 return; 1404 } 1405 1406 int64_t ElementSize = 0; 1407 1408 // Although PTX has direct support for struct type and array type and LLVM IR 1409 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1410 // support these high level field accesses. Structs and arrays are lowered 1411 // into arrays of bytes. 1412 switch (ETy->getTypeID()) { 1413 case Type::StructTyID: 1414 case Type::ArrayTyID: 1415 case Type::VectorTyID: 1416 ElementSize = TD->getTypeStoreSize(ETy); 1417 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1418 if (ElementSize) { 1419 O << itostr(ElementSize); 1420 } 1421 O << "]"; 1422 break; 1423 default: 1424 assert(0 && "type not supported yet"); 1425 } 1426 return; 1427} 1428 1429static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1430 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1431 return TD->getPrefTypeAlignment(Ty); 1432 1433 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1434 if (ATy) 1435 return getOpenCLAlignment(TD, ATy->getElementType()); 1436 1437 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1438 if (VTy) { 1439 Type *ETy = VTy->getElementType(); 1440 unsigned int numE = VTy->getNumElements(); 1441 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1442 if (numE == 3) 1443 return 4 * alignE; 1444 else 1445 return numE * alignE; 1446 } 1447 1448 const StructType *STy = dyn_cast<StructType>(Ty); 1449 if (STy) { 1450 unsigned int alignStruct = 1; 1451 // Go through each element of the struct and find the 1452 // largest alignment. 1453 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1454 Type *ETy = STy->getElementType(i); 1455 unsigned int align = getOpenCLAlignment(TD, ETy); 1456 if (align > alignStruct) 1457 alignStruct = align; 1458 } 1459 return alignStruct; 1460 } 1461 1462 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1463 if (FTy) 1464 return TD->getPointerPrefAlignment(); 1465 return TD->getPrefTypeAlignment(Ty); 1466} 1467 1468void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1469 int paramIndex, raw_ostream &O) { 1470 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1471 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1472 O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex; 1473 else { 1474 std::string argName = I->getName(); 1475 const char *p = argName.c_str(); 1476 while (*p) { 1477 if (*p == '.') 1478 O << "_"; 1479 else 1480 O << *p; 1481 p++; 1482 } 1483 } 1484} 1485 1486void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1487 Function::const_arg_iterator I, E; 1488 int i = 0; 1489 1490 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1491 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1492 O << *CurrentFnSym << "_param_" << paramIndex; 1493 return; 1494 } 1495 1496 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1497 if (i == paramIndex) { 1498 printParamName(I, paramIndex, O); 1499 return; 1500 } 1501 } 1502 llvm_unreachable("paramIndex out of bound"); 1503} 1504 1505void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1506 const DataLayout *TD = TM.getDataLayout(); 1507 const AttributeSet &PAL = F->getAttributes(); 1508 const TargetLowering *TLI = TM.getTargetLowering(); 1509 Function::const_arg_iterator I, E; 1510 unsigned paramIndex = 0; 1511 bool first = true; 1512 bool isKernelFunc = llvm::isKernelFunction(*F); 1513 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1514 MVT thePointerTy = TLI->getPointerTy(); 1515 1516 O << "(\n"; 1517 1518 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1519 Type *Ty = I->getType(); 1520 1521 if (!first) 1522 O << ",\n"; 1523 1524 first = false; 1525 1526 // Handle image/sampler parameters 1527 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1528 if (llvm::isImage(*I)) { 1529 std::string sname = I->getName(); 1530 if (llvm::isImageWriteOnly(*I)) 1531 O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_" 1532 << paramIndex; 1533 else // Default image is read_only 1534 O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_" 1535 << paramIndex; 1536 } else // Should be llvm::isSampler(*I) 1537 O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_" 1538 << paramIndex; 1539 continue; 1540 } 1541 1542 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1543 if (Ty->isVectorTy()) { 1544 // Just print .param .b8 .align <a> .param[size]; 1545 // <a> = PAL.getparamalignment 1546 // size = typeallocsize of element type 1547 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1548 if (align == 0) 1549 align = TD->getABITypeAlignment(Ty); 1550 1551 unsigned sz = TD->getTypeAllocSize(Ty); 1552 O << "\t.param .align " << align << " .b8 "; 1553 printParamName(I, paramIndex, O); 1554 O << "[" << sz << "]"; 1555 1556 continue; 1557 } 1558 // Just a scalar 1559 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1560 if (isKernelFunc) { 1561 if (PTy) { 1562 // Special handling for pointer arguments to kernel 1563 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1564 1565 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1566 Type *ETy = PTy->getElementType(); 1567 int addrSpace = PTy->getAddressSpace(); 1568 switch (addrSpace) { 1569 default: 1570 O << ".ptr "; 1571 break; 1572 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1573 O << ".ptr .const "; 1574 break; 1575 case llvm::ADDRESS_SPACE_SHARED: 1576 O << ".ptr .shared "; 1577 break; 1578 case llvm::ADDRESS_SPACE_GLOBAL: 1579 case llvm::ADDRESS_SPACE_CONST: 1580 O << ".ptr .global "; 1581 break; 1582 } 1583 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1584 } 1585 printParamName(I, paramIndex, O); 1586 continue; 1587 } 1588 1589 // non-pointer scalar to kernel func 1590 O << "\t.param ."; 1591 // Special case: predicate operands become .u8 types 1592 if (Ty->isIntegerTy(1)) 1593 O << "u8"; 1594 else 1595 O << getPTXFundamentalTypeStr(Ty); 1596 O << " "; 1597 printParamName(I, paramIndex, O); 1598 continue; 1599 } 1600 // Non-kernel function, just print .param .b<size> for ABI 1601 // and .reg .b<size> for non ABY 1602 unsigned sz = 0; 1603 if (isa<IntegerType>(Ty)) { 1604 sz = cast<IntegerType>(Ty)->getBitWidth(); 1605 if (sz < 32) 1606 sz = 32; 1607 } else if (isa<PointerType>(Ty)) 1608 sz = thePointerTy.getSizeInBits(); 1609 else 1610 sz = Ty->getPrimitiveSizeInBits(); 1611 if (isABI) 1612 O << "\t.param .b" << sz << " "; 1613 else 1614 O << "\t.reg .b" << sz << " "; 1615 printParamName(I, paramIndex, O); 1616 continue; 1617 } 1618 1619 // param has byVal attribute. So should be a pointer 1620 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1621 assert(PTy && "Param with byval attribute should be a pointer type"); 1622 Type *ETy = PTy->getElementType(); 1623 1624 if (isABI || isKernelFunc) { 1625 // Just print .param .b8 .align <a> .param[size]; 1626 // <a> = PAL.getparamalignment 1627 // size = typeallocsize of element type 1628 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1629 if (align == 0) 1630 align = TD->getABITypeAlignment(ETy); 1631 1632 unsigned sz = TD->getTypeAllocSize(ETy); 1633 O << "\t.param .align " << align << " .b8 "; 1634 printParamName(I, paramIndex, O); 1635 O << "[" << sz << "]"; 1636 continue; 1637 } else { 1638 // Split the ETy into constituent parts and 1639 // print .param .b<size> <name> for each part. 1640 // Further, if a part is vector, print the above for 1641 // each vector element. 1642 SmallVector<EVT, 16> vtparts; 1643 ComputeValueVTs(*TLI, ETy, vtparts); 1644 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1645 unsigned elems = 1; 1646 EVT elemtype = vtparts[i]; 1647 if (vtparts[i].isVector()) { 1648 elems = vtparts[i].getVectorNumElements(); 1649 elemtype = vtparts[i].getVectorElementType(); 1650 } 1651 1652 for (unsigned j = 0, je = elems; j != je; ++j) { 1653 unsigned sz = elemtype.getSizeInBits(); 1654 if (elemtype.isInteger() && (sz < 32)) 1655 sz = 32; 1656 O << "\t.reg .b" << sz << " "; 1657 printParamName(I, paramIndex, O); 1658 if (j < je - 1) 1659 O << ",\n"; 1660 ++paramIndex; 1661 } 1662 if (i < e - 1) 1663 O << ",\n"; 1664 } 1665 --paramIndex; 1666 continue; 1667 } 1668 } 1669 1670 O << "\n)\n"; 1671} 1672 1673void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1674 raw_ostream &O) { 1675 const Function *F = MF.getFunction(); 1676 emitFunctionParamList(F, O); 1677} 1678 1679void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1680 const MachineFunction &MF) { 1681 SmallString<128> Str; 1682 raw_svector_ostream O(Str); 1683 1684 // Map the global virtual register number to a register class specific 1685 // virtual register number starting from 1 with that class. 1686 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1687 //unsigned numRegClasses = TRI->getNumRegClasses(); 1688 1689 // Emit the Fake Stack Object 1690 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1691 int NumBytes = (int) MFI->getStackSize(); 1692 if (NumBytes) { 1693 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1694 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1695 if (nvptxSubtarget.is64Bit()) { 1696 O << "\t.reg .b64 \t%SP;\n"; 1697 O << "\t.reg .b64 \t%SPL;\n"; 1698 } else { 1699 O << "\t.reg .b32 \t%SP;\n"; 1700 O << "\t.reg .b32 \t%SPL;\n"; 1701 } 1702 } 1703 1704 // Go through all virtual registers to establish the mapping between the 1705 // global virtual 1706 // register number and the per class virtual register number. 1707 // We use the per class virtual register number in the ptx output. 1708 unsigned int numVRs = MRI->getNumVirtRegs(); 1709 for (unsigned i = 0; i < numVRs; i++) { 1710 unsigned int vr = TRI->index2VirtReg(i); 1711 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1712 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; 1713 int n = regmap.size(); 1714 regmap.insert(std::make_pair(vr, n + 1)); 1715 } 1716 1717 // Emit register declarations 1718 // @TODO: Extract out the real register usage 1719 O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1720 O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1721 O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1722 O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1723 O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1724 O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1725 O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1726 1727 // Emit declaration of the virtual registers or 'physical' registers for 1728 // each register class 1729 //for (unsigned i=0; i< numRegClasses; i++) { 1730 // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; 1731 // const TargetRegisterClass *RC = TRI->getRegClass(i); 1732 // std::string rcname = getNVPTXRegClassName(RC); 1733 // std::string rcStr = getNVPTXRegClassStr(RC); 1734 // //int n = regmap.size(); 1735 // if (!isNVPTXVectorRegClass(RC)) { 1736 // O << "\t.reg " << rcname << " \t" << rcStr << "<" 1737 // << NVPTXNumRegisters << ">;\n"; 1738 // } 1739 1740 // Only declare those registers that may be used. And do not emit vector 1741 // registers as 1742 // they are all elementized to scalar registers. 1743 //if (n && !isNVPTXVectorRegClass(RC)) { 1744 // if (RegAllocNilUsed) { 1745 // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1746 // << ">;\n"; 1747 // } 1748 // else { 1749 // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) 1750 // << "<" << 32 << ">;\n"; 1751 // } 1752 //} 1753 //} 1754 1755 OutStreamer.EmitRawText(O.str()); 1756} 1757 1758void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1759 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1760 bool ignored; 1761 unsigned int numHex; 1762 const char *lead; 1763 1764 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1765 numHex = 8; 1766 lead = "0f"; 1767 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1768 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1769 numHex = 16; 1770 lead = "0d"; 1771 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1772 } else 1773 llvm_unreachable("unsupported fp type"); 1774 1775 APInt API = APF.bitcastToAPInt(); 1776 std::string hexstr(utohexstr(API.getZExtValue())); 1777 O << lead; 1778 if (hexstr.length() < numHex) 1779 O << std::string(numHex - hexstr.length(), '0'); 1780 O << utohexstr(API.getZExtValue()); 1781} 1782 1783void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1784 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1785 O << CI->getValue(); 1786 return; 1787 } 1788 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1789 printFPConstant(CFP, O); 1790 return; 1791 } 1792 if (isa<ConstantPointerNull>(CPV)) { 1793 O << "0"; 1794 return; 1795 } 1796 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1797 O << *Mang->getSymbol(GVar); 1798 return; 1799 } 1800 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1801 const Value *v = Cexpr->stripPointerCasts(); 1802 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1803 O << *Mang->getSymbol(GVar); 1804 return; 1805 } else { 1806 O << *LowerConstant(CPV, *this); 1807 return; 1808 } 1809 } 1810 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1811} 1812 1813void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1814 AggBuffer *aggBuffer) { 1815 1816 const DataLayout *TD = TM.getDataLayout(); 1817 1818 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1819 int s = TD->getTypeAllocSize(CPV->getType()); 1820 if (s < Bytes) 1821 s = Bytes; 1822 aggBuffer->addZeros(s); 1823 return; 1824 } 1825 1826 unsigned char *ptr; 1827 switch (CPV->getType()->getTypeID()) { 1828 1829 case Type::IntegerTyID: { 1830 const Type *ETy = CPV->getType(); 1831 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1832 unsigned char c = 1833 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1834 ptr = &c; 1835 aggBuffer->addBytes(ptr, 1, Bytes); 1836 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1837 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1838 ptr = (unsigned char *)&int16; 1839 aggBuffer->addBytes(ptr, 2, Bytes); 1840 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1841 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1842 int int32 = (int)(constInt->getZExtValue()); 1843 ptr = (unsigned char *)&int32; 1844 aggBuffer->addBytes(ptr, 4, Bytes); 1845 break; 1846 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1847 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1848 ConstantFoldConstantExpression(Cexpr, TD))) { 1849 int int32 = (int)(constInt->getZExtValue()); 1850 ptr = (unsigned char *)&int32; 1851 aggBuffer->addBytes(ptr, 4, Bytes); 1852 break; 1853 } 1854 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1855 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1856 aggBuffer->addSymbol(v); 1857 aggBuffer->addZeros(4); 1858 break; 1859 } 1860 } 1861 llvm_unreachable("unsupported integer const type"); 1862 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1863 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1864 long long int64 = (long long)(constInt->getZExtValue()); 1865 ptr = (unsigned char *)&int64; 1866 aggBuffer->addBytes(ptr, 8, Bytes); 1867 break; 1868 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1869 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1870 ConstantFoldConstantExpression(Cexpr, TD))) { 1871 long long int64 = (long long)(constInt->getZExtValue()); 1872 ptr = (unsigned char *)&int64; 1873 aggBuffer->addBytes(ptr, 8, Bytes); 1874 break; 1875 } 1876 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1877 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1878 aggBuffer->addSymbol(v); 1879 aggBuffer->addZeros(8); 1880 break; 1881 } 1882 } 1883 llvm_unreachable("unsupported integer const type"); 1884 } else 1885 llvm_unreachable("unsupported integer const type"); 1886 break; 1887 } 1888 case Type::FloatTyID: 1889 case Type::DoubleTyID: { 1890 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1891 const Type *Ty = CFP->getType(); 1892 if (Ty == Type::getFloatTy(CPV->getContext())) { 1893 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1894 ptr = (unsigned char *)&float32; 1895 aggBuffer->addBytes(ptr, 4, Bytes); 1896 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1897 double float64 = CFP->getValueAPF().convertToDouble(); 1898 ptr = (unsigned char *)&float64; 1899 aggBuffer->addBytes(ptr, 8, Bytes); 1900 } else { 1901 llvm_unreachable("unsupported fp const type"); 1902 } 1903 break; 1904 } 1905 case Type::PointerTyID: { 1906 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1907 aggBuffer->addSymbol(GVar); 1908 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1909 const Value *v = Cexpr->stripPointerCasts(); 1910 aggBuffer->addSymbol(v); 1911 } 1912 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1913 aggBuffer->addZeros(s); 1914 break; 1915 } 1916 1917 case Type::ArrayTyID: 1918 case Type::VectorTyID: 1919 case Type::StructTyID: { 1920 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1921 isa<ConstantStruct>(CPV)) { 1922 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1923 bufferAggregateConstant(CPV, aggBuffer); 1924 if (Bytes > ElementSize) 1925 aggBuffer->addZeros(Bytes - ElementSize); 1926 } else if (isa<ConstantAggregateZero>(CPV)) 1927 aggBuffer->addZeros(Bytes); 1928 else 1929 llvm_unreachable("Unexpected Constant type"); 1930 break; 1931 } 1932 1933 default: 1934 llvm_unreachable("unsupported type"); 1935 } 1936} 1937 1938void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1939 AggBuffer *aggBuffer) { 1940 const DataLayout *TD = TM.getDataLayout(); 1941 int Bytes; 1942 1943 // Old constants 1944 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1945 if (CPV->getNumOperands()) 1946 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1947 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1948 return; 1949 } 1950 1951 if (const ConstantDataSequential *CDS = 1952 dyn_cast<ConstantDataSequential>(CPV)) { 1953 if (CDS->getNumElements()) 1954 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1955 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1956 aggBuffer); 1957 return; 1958 } 1959 1960 if (isa<ConstantStruct>(CPV)) { 1961 if (CPV->getNumOperands()) { 1962 StructType *ST = cast<StructType>(CPV->getType()); 1963 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1964 if (i == (e - 1)) 1965 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1966 TD->getTypeAllocSize(ST) - 1967 TD->getStructLayout(ST)->getElementOffset(i); 1968 else 1969 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1970 TD->getStructLayout(ST)->getElementOffset(i); 1971 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1972 } 1973 } 1974 return; 1975 } 1976 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1977} 1978 1979// buildTypeNameMap - Run through symbol table looking for type names. 1980// 1981 1982bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1983 1984 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1985 1986 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1987 !PI->second.compare("struct._image2d_t") || 1988 !PI->second.compare("struct._image3d_t"))) 1989 return true; 1990 1991 return false; 1992} 1993 1994/// PrintAsmOperand - Print out an operand for an inline asm expression. 1995/// 1996bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 1997 unsigned AsmVariant, 1998 const char *ExtraCode, raw_ostream &O) { 1999 if (ExtraCode && ExtraCode[0]) { 2000 if (ExtraCode[1] != 0) 2001 return true; // Unknown modifier. 2002 2003 switch (ExtraCode[0]) { 2004 default: 2005 // See if this is a generic print operand 2006 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2007 case 'r': 2008 break; 2009 } 2010 } 2011 2012 printOperand(MI, OpNo, O); 2013 2014 return false; 2015} 2016 2017bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2018 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2019 const char *ExtraCode, raw_ostream &O) { 2020 if (ExtraCode && ExtraCode[0]) 2021 return true; // Unknown modifier 2022 2023 O << '['; 2024 printMemOperand(MI, OpNo, O); 2025 O << ']'; 2026 2027 return false; 2028} 2029 2030bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 2031 switch (MI.getOpcode()) { 2032 default: 2033 return false; 2034 case NVPTX::CallArgBeginInst: 2035 case NVPTX::CallArgEndInst0: 2036 case NVPTX::CallArgEndInst1: 2037 case NVPTX::CallArgF32: 2038 case NVPTX::CallArgF64: 2039 case NVPTX::CallArgI16: 2040 case NVPTX::CallArgI32: 2041 case NVPTX::CallArgI32imm: 2042 case NVPTX::CallArgI64: 2043 case NVPTX::CallArgI8: 2044 case NVPTX::CallArgParam: 2045 case NVPTX::CallVoidInst: 2046 case NVPTX::CallVoidInstReg: 2047 case NVPTX::Callseq_End: 2048 case NVPTX::CallVoidInstReg64: 2049 case NVPTX::DeclareParamInst: 2050 case NVPTX::DeclareRetMemInst: 2051 case NVPTX::DeclareRetRegInst: 2052 case NVPTX::DeclareRetScalarInst: 2053 case NVPTX::DeclareScalarParamInst: 2054 case NVPTX::DeclareScalarRegInst: 2055 case NVPTX::StoreParamF32: 2056 case NVPTX::StoreParamF64: 2057 case NVPTX::StoreParamI16: 2058 case NVPTX::StoreParamI32: 2059 case NVPTX::StoreParamI64: 2060 case NVPTX::StoreParamI8: 2061 case NVPTX::StoreParamS32I8: 2062 case NVPTX::StoreParamU32I8: 2063 case NVPTX::StoreParamS32I16: 2064 case NVPTX::StoreParamU32I16: 2065 case NVPTX::StoreRetvalF32: 2066 case NVPTX::StoreRetvalF64: 2067 case NVPTX::StoreRetvalI16: 2068 case NVPTX::StoreRetvalI32: 2069 case NVPTX::StoreRetvalI64: 2070 case NVPTX::StoreRetvalI8: 2071 case NVPTX::LastCallArgF32: 2072 case NVPTX::LastCallArgF64: 2073 case NVPTX::LastCallArgI16: 2074 case NVPTX::LastCallArgI32: 2075 case NVPTX::LastCallArgI32imm: 2076 case NVPTX::LastCallArgI64: 2077 case NVPTX::LastCallArgI8: 2078 case NVPTX::LastCallArgParam: 2079 case NVPTX::LoadParamMemF32: 2080 case NVPTX::LoadParamMemF64: 2081 case NVPTX::LoadParamMemI16: 2082 case NVPTX::LoadParamMemI32: 2083 case NVPTX::LoadParamMemI64: 2084 case NVPTX::LoadParamMemI8: 2085 case NVPTX::LoadParamRegF32: 2086 case NVPTX::LoadParamRegF64: 2087 case NVPTX::LoadParamRegI16: 2088 case NVPTX::LoadParamRegI32: 2089 case NVPTX::LoadParamRegI64: 2090 case NVPTX::LoadParamRegI8: 2091 case NVPTX::PrototypeInst: 2092 case NVPTX::DBG_VALUE: 2093 return true; 2094 } 2095 return false; 2096} 2097 2098// Force static initialization. 2099extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2100 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2101 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2102} 2103 2104void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2105 std::stringstream temp; 2106 LineReader *reader = this->getReader(filename.str()); 2107 temp << "\n//"; 2108 temp << filename.str(); 2109 temp << ":"; 2110 temp << line; 2111 temp << " "; 2112 temp << reader->readLine(line); 2113 temp << "\n"; 2114 this->OutStreamer.EmitRawText(Twine(temp.str())); 2115} 2116 2117LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2118 if (reader == NULL) { 2119 reader = new LineReader(filename); 2120 } 2121 2122 if (reader->fileName() != filename) { 2123 delete reader; 2124 reader = new LineReader(filename); 2125 } 2126 2127 return reader; 2128} 2129 2130std::string LineReader::readLine(unsigned lineNum) { 2131 if (lineNum < theCurLine) { 2132 theCurLine = 0; 2133 fstr.seekg(0, std::ios::beg); 2134 } 2135 while (theCurLine < lineNum) { 2136 fstr.getline(buff, 500); 2137 theCurLine++; 2138 } 2139 return buff; 2140} 2141 2142// Force static initialization. 2143extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2144 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2145 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2146} 2147