NVPTXAsmPrinter.cpp revision 249423
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "MCTargetDesc/NVPTXMCAsmInfo.h" 17#include "NVPTX.h" 18#include "NVPTXInstrInfo.h" 19#include "NVPTXNumRegisters.h" 20#include "NVPTXRegisterInfo.h" 21#include "NVPTXTargetMachine.h" 22#include "NVPTXUtilities.h" 23#include "cl_common_defines.h" 24#include "llvm/ADT/StringExtras.h" 25#include "llvm/Analysis/ConstantFolding.h" 26#include "llvm/Assembly/Writer.h" 27#include "llvm/CodeGen/Analysis.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineModuleInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/DebugInfo.h" 32#include "llvm/IR/DerivedTypes.h" 33#include "llvm/IR/Function.h" 34#include "llvm/IR/GlobalVariable.h" 35#include "llvm/IR/Module.h" 36#include "llvm/IR/Operator.h" 37#include "llvm/MC/MCStreamer.h" 38#include "llvm/MC/MCSymbol.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/FormattedStream.h" 42#include "llvm/Support/Path.h" 43#include "llvm/Support/TargetRegistry.h" 44#include "llvm/Support/TimeValue.h" 45#include "llvm/Target/Mangler.h" 46#include "llvm/Target/TargetLoweringObjectFile.h" 47#include <sstream> 48using namespace llvm; 49 50#include "NVPTXGenAsmWriter.inc" 51 52bool RegAllocNilUsed = true; 53 54#define DEPOTNAME "__local_depot" 55 56static cl::opt<bool> 57EmitLineNumbers("nvptx-emit-line-numbers", 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61namespace llvm { bool InterleaveSrcInPtx = false; } 62 63static cl::opt<bool, true> 64InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, 65 cl::desc("NVPTX Specific: Emit source line in ptx file"), 66 cl::location(llvm::InterleaveSrcInPtx)); 67 68namespace { 69/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 70/// depends. 71void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { 72 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 73 Globals.insert(GV); 74 else { 75 if (User *U = dyn_cast<User>(V)) { 76 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 77 DiscoverDependentGlobals(U->getOperand(i), Globals); 78 } 79 } 80 } 81} 82 83/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 84/// instances to be emitted, but only after any dependents have been added 85/// first. 86void VisitGlobalVariableForEmission( 87 GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, 88 DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { 89 // Have we already visited this one? 90 if (Visited.count(GV)) 91 return; 92 93 // Do we have a circular dependency? 94 if (Visiting.count(GV)) 95 report_fatal_error("Circular dependency found in global variable set"); 96 97 // Start visiting this global 98 Visiting.insert(GV); 99 100 // Make sure we visit all dependents first 101 DenseSet<GlobalVariable *> Others; 102 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 103 DiscoverDependentGlobals(GV->getOperand(i), Others); 104 105 for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), 106 E = Others.end(); 107 I != E; ++I) 108 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 109 110 // Now we can visit ourself 111 Order.push_back(GV); 112 Visited.insert(GV); 113 Visiting.erase(GV); 114} 115} 116 117// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 118// cannot just link to the existing version. 119/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 120/// 121using namespace nvptx; 122const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 123 MCContext &Ctx = AP.OutContext; 124 125 if (CV->isNullValue() || isa<UndefValue>(CV)) 126 return MCConstantExpr::Create(0, Ctx); 127 128 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 129 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 130 131 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 132 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 133 134 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 135 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 136 137 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 138 if (CE == 0) 139 llvm_unreachable("Unknown constant value to lower!"); 140 141 switch (CE->getOpcode()) { 142 default: 143 // If the code isn't optimized, there may be outstanding folding 144 // opportunities. Attempt to fold the expression using DataLayout as a 145 // last resort before giving up. 146 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 147 if (C != CE) 148 return LowerConstant(C, AP); 149 150 // Otherwise report the problem to the user. 151 { 152 std::string S; 153 raw_string_ostream OS(S); 154 OS << "Unsupported expression in static initializer: "; 155 WriteAsOperand(OS, CE, /*PrintType=*/ false, 156 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 157 report_fatal_error(OS.str()); 158 } 159 case Instruction::GetElementPtr: { 160 const DataLayout &TD = *AP.TM.getDataLayout(); 161 // Generate a symbolic expression for the byte address 162 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 163 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 164 165 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 166 if (!OffsetAI) 167 return Base; 168 169 int64_t Offset = OffsetAI.getSExtValue(); 170 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 171 Ctx); 172 } 173 174 case Instruction::Trunc: 175 // We emit the value and depend on the assembler to truncate the generated 176 // expression properly. This is important for differences between 177 // blockaddress labels. Since the two labels are in the same function, it 178 // is reasonable to treat their delta as a 32-bit value. 179 // FALL THROUGH. 180 case Instruction::BitCast: 181 return LowerConstant(CE->getOperand(0), AP); 182 183 case Instruction::IntToPtr: { 184 const DataLayout &TD = *AP.TM.getDataLayout(); 185 // Handle casts to pointers by changing them into casts to the appropriate 186 // integer type. This promotes constant folding and simplifies this code. 187 Constant *Op = CE->getOperand(0); 188 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 189 false /*ZExt*/); 190 return LowerConstant(Op, AP); 191 } 192 193 case Instruction::PtrToInt: { 194 const DataLayout &TD = *AP.TM.getDataLayout(); 195 // Support only foldable casts to/from pointers that can be eliminated by 196 // changing the pointer to the appropriately sized integer type. 197 Constant *Op = CE->getOperand(0); 198 Type *Ty = CE->getType(); 199 200 const MCExpr *OpExpr = LowerConstant(Op, AP); 201 202 // We can emit the pointer value into this slot if the slot is an 203 // integer slot equal to the size of the pointer. 204 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 205 return OpExpr; 206 207 // Otherwise the pointer is smaller than the resultant integer, mask off 208 // the high bits so we are sure to get a proper truncation if the input is 209 // a constant expr. 210 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 211 const MCExpr *MaskExpr = 212 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 213 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 214 } 215 216 // The MC library also has a right-shift operator, but it isn't consistently 217 // signed or unsigned between different targets. 218 case Instruction::Add: 219 case Instruction::Sub: 220 case Instruction::Mul: 221 case Instruction::SDiv: 222 case Instruction::SRem: 223 case Instruction::Shl: 224 case Instruction::And: 225 case Instruction::Or: 226 case Instruction::Xor: { 227 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 228 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 229 switch (CE->getOpcode()) { 230 default: 231 llvm_unreachable("Unknown binary operator constant cast expr"); 232 case Instruction::Add: 233 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 234 case Instruction::Sub: 235 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 236 case Instruction::Mul: 237 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 238 case Instruction::SDiv: 239 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 240 case Instruction::SRem: 241 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 242 case Instruction::Shl: 243 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 244 case Instruction::And: 245 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 246 case Instruction::Or: 247 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 248 case Instruction::Xor: 249 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 250 } 251 } 252 } 253} 254 255void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 256 if (!EmitLineNumbers) 257 return; 258 if (ignoreLoc(MI)) 259 return; 260 261 DebugLoc curLoc = MI.getDebugLoc(); 262 263 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 264 return; 265 266 if (prevDebugLoc == curLoc) 267 return; 268 269 prevDebugLoc = curLoc; 270 271 if (curLoc.isUnknown()) 272 return; 273 274 const MachineFunction *MF = MI.getParent()->getParent(); 275 //const TargetMachine &TM = MF->getTarget(); 276 277 const LLVMContext &ctx = MF->getFunction()->getContext(); 278 DIScope Scope(curLoc.getScope(ctx)); 279 280 if (!Scope.Verify()) 281 return; 282 283 StringRef fileName(Scope.getFilename()); 284 StringRef dirName(Scope.getDirectory()); 285 SmallString<128> FullPathName = dirName; 286 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 287 sys::path::append(FullPathName, fileName); 288 fileName = FullPathName.str(); 289 } 290 291 if (filenameMap.find(fileName.str()) == filenameMap.end()) 292 return; 293 294 // Emit the line from the source file. 295 if (llvm::InterleaveSrcInPtx) 296 this->emitSrcInText(fileName.str(), curLoc.getLine()); 297 298 std::stringstream temp; 299 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 300 << " " << curLoc.getCol(); 301 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 302} 303 304void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 305 SmallString<128> Str; 306 raw_svector_ostream OS(Str); 307 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 308 emitLineNumberAsDotLoc(*MI); 309 printInstruction(MI, OS); 310 OutStreamer.EmitRawText(OS.str()); 311} 312 313void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 314 const DataLayout *TD = TM.getDataLayout(); 315 const TargetLowering *TLI = TM.getTargetLowering(); 316 317 Type *Ty = F->getReturnType(); 318 319 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 320 321 if (Ty->getTypeID() == Type::VoidTyID) 322 return; 323 324 O << " ("; 325 326 if (isABI) { 327 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 328 unsigned size = 0; 329 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 330 size = ITy->getBitWidth(); 331 if (size < 32) 332 size = 32; 333 } else { 334 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 335 size = Ty->getPrimitiveSizeInBits(); 336 } 337 338 O << ".param .b" << size << " func_retval0"; 339 } else if (isa<PointerType>(Ty)) { 340 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 341 << " func_retval0"; 342 } else { 343 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 344 SmallVector<EVT, 16> vtparts; 345 ComputeValueVTs(*TLI, Ty, vtparts); 346 unsigned totalsz = 0; 347 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 348 unsigned elems = 1; 349 EVT elemtype = vtparts[i]; 350 if (vtparts[i].isVector()) { 351 elems = vtparts[i].getVectorNumElements(); 352 elemtype = vtparts[i].getVectorElementType(); 353 } 354 for (unsigned j = 0, je = elems; j != je; ++j) { 355 unsigned sz = elemtype.getSizeInBits(); 356 if (elemtype.isInteger() && (sz < 8)) 357 sz = 8; 358 totalsz += sz / 8; 359 } 360 } 361 unsigned retAlignment = 0; 362 if (!llvm::getAlign(*F, 0, retAlignment)) 363 retAlignment = TD->getABITypeAlignment(Ty); 364 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 365 << "]"; 366 } else 367 assert(false && "Unknown return type"); 368 } 369 } else { 370 SmallVector<EVT, 16> vtparts; 371 ComputeValueVTs(*TLI, Ty, vtparts); 372 unsigned idx = 0; 373 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 374 unsigned elems = 1; 375 EVT elemtype = vtparts[i]; 376 if (vtparts[i].isVector()) { 377 elems = vtparts[i].getVectorNumElements(); 378 elemtype = vtparts[i].getVectorElementType(); 379 } 380 381 for (unsigned j = 0, je = elems; j != je; ++j) { 382 unsigned sz = elemtype.getSizeInBits(); 383 if (elemtype.isInteger() && (sz < 32)) 384 sz = 32; 385 O << ".reg .b" << sz << " func_retval" << idx; 386 if (j < je - 1) 387 O << ", "; 388 ++idx; 389 } 390 if (i < e - 1) 391 O << ", "; 392 } 393 } 394 O << ") "; 395 return; 396} 397 398void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 399 raw_ostream &O) { 400 const Function *F = MF.getFunction(); 401 printReturnValStr(F, O); 402} 403 404void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 405 SmallString<128> Str; 406 raw_svector_ostream O(Str); 407 408 // Set up 409 MRI = &MF->getRegInfo(); 410 F = MF->getFunction(); 411 emitLinkageDirective(F, O); 412 if (llvm::isKernelFunction(*F)) 413 O << ".entry "; 414 else { 415 O << ".func "; 416 printReturnValStr(*MF, O); 417 } 418 419 O << *CurrentFnSym; 420 421 emitFunctionParamList(*MF, O); 422 423 if (llvm::isKernelFunction(*F)) 424 emitKernelFunctionDirectives(*F, O); 425 426 OutStreamer.EmitRawText(O.str()); 427 428 prevDebugLoc = DebugLoc(); 429} 430 431void NVPTXAsmPrinter::EmitFunctionBodyStart() { 432 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 433 unsigned numRegClasses = TRI.getNumRegClasses(); 434 VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; 435 OutStreamer.EmitRawText(StringRef("{\n")); 436 setAndEmitFunctionVirtualRegisters(*MF); 437 438 SmallString<128> Str; 439 raw_svector_ostream O(Str); 440 emitDemotedVars(MF->getFunction(), O); 441 OutStreamer.EmitRawText(O.str()); 442} 443 444void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 445 OutStreamer.EmitRawText(StringRef("}\n")); 446 delete[] VRidGlobal2LocalMap; 447} 448 449void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 450 raw_ostream &O) const { 451 // If the NVVM IR has some of reqntid* specified, then output 452 // the reqntid directive, and set the unspecified ones to 1. 453 // If none of reqntid* is specified, don't output reqntid directive. 454 unsigned reqntidx, reqntidy, reqntidz; 455 bool specified = false; 456 if (llvm::getReqNTIDx(F, reqntidx) == false) 457 reqntidx = 1; 458 else 459 specified = true; 460 if (llvm::getReqNTIDy(F, reqntidy) == false) 461 reqntidy = 1; 462 else 463 specified = true; 464 if (llvm::getReqNTIDz(F, reqntidz) == false) 465 reqntidz = 1; 466 else 467 specified = true; 468 469 if (specified) 470 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 471 << "\n"; 472 473 // If the NVVM IR has some of maxntid* specified, then output 474 // the maxntid directive, and set the unspecified ones to 1. 475 // If none of maxntid* is specified, don't output maxntid directive. 476 unsigned maxntidx, maxntidy, maxntidz; 477 specified = false; 478 if (llvm::getMaxNTIDx(F, maxntidx) == false) 479 maxntidx = 1; 480 else 481 specified = true; 482 if (llvm::getMaxNTIDy(F, maxntidy) == false) 483 maxntidy = 1; 484 else 485 specified = true; 486 if (llvm::getMaxNTIDz(F, maxntidz) == false) 487 maxntidz = 1; 488 else 489 specified = true; 490 491 if (specified) 492 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 493 << "\n"; 494 495 unsigned mincta; 496 if (llvm::getMinCTASm(F, mincta)) 497 O << ".minnctapersm " << mincta << "\n"; 498} 499 500void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 501 raw_ostream &O) { 502 const TargetRegisterClass *RC = MRI->getRegClass(vr); 503 unsigned id = RC->getID(); 504 505 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; 506 unsigned mapped_vr = regmap[vr]; 507 508 if (!isVec) { 509 O << getNVPTXRegClassStr(RC) << mapped_vr; 510 return; 511 } 512 report_fatal_error("Bad register!"); 513} 514 515void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 516 raw_ostream &O) { 517 getVirtualRegisterName(vr, isVec, O); 518} 519 520void NVPTXAsmPrinter::printVecModifiedImmediate( 521 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 522 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 523 int Imm = (int) MO.getImm(); 524 if (0 == strcmp(Modifier, "vecelem")) 525 O << "_" << vecelem[Imm]; 526 else if (0 == strcmp(Modifier, "vecv4comm1")) { 527 if ((Imm < 0) || (Imm > 3)) 528 O << "//"; 529 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 530 if ((Imm < 4) || (Imm > 7)) 531 O << "//"; 532 } else if (0 == strcmp(Modifier, "vecv4pos")) { 533 if (Imm < 0) 534 Imm = 0; 535 O << "_" << vecelem[Imm % 4]; 536 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 537 if ((Imm < 0) || (Imm > 1)) 538 O << "//"; 539 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 540 if ((Imm < 2) || (Imm > 3)) 541 O << "//"; 542 } else if (0 == strcmp(Modifier, "vecv2pos")) { 543 if (Imm < 0) 544 Imm = 0; 545 O << "_" << vecelem[Imm % 2]; 546 } else 547 llvm_unreachable("Unknown Modifier on immediate operand"); 548} 549 550void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 551 raw_ostream &O, const char *Modifier) { 552 const MachineOperand &MO = MI->getOperand(opNum); 553 switch (MO.getType()) { 554 case MachineOperand::MO_Register: 555 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 556 if (MO.getReg() == NVPTX::VRDepot) 557 O << DEPOTNAME << getFunctionNumber(); 558 else 559 O << getRegisterName(MO.getReg()); 560 } else { 561 if (!Modifier) 562 emitVirtualRegister(MO.getReg(), false, O); 563 else { 564 if (strcmp(Modifier, "vecfull") == 0) 565 emitVirtualRegister(MO.getReg(), true, O); 566 else 567 llvm_unreachable( 568 "Don't know how to handle the modifier on virtual register."); 569 } 570 } 571 return; 572 573 case MachineOperand::MO_Immediate: 574 if (!Modifier) 575 O << MO.getImm(); 576 else if (strstr(Modifier, "vec") == Modifier) 577 printVecModifiedImmediate(MO, Modifier, O); 578 else 579 llvm_unreachable( 580 "Don't know how to handle modifier on immediate operand"); 581 return; 582 583 case MachineOperand::MO_FPImmediate: 584 printFPConstant(MO.getFPImm(), O); 585 break; 586 587 case MachineOperand::MO_GlobalAddress: 588 O << *Mang->getSymbol(MO.getGlobal()); 589 break; 590 591 case MachineOperand::MO_ExternalSymbol: { 592 const char *symbname = MO.getSymbolName(); 593 if (strstr(symbname, ".PARAM") == symbname) { 594 unsigned index; 595 sscanf(symbname + 6, "%u[];", &index); 596 printParamName(index, O); 597 } else if (strstr(symbname, ".HLPPARAM") == symbname) { 598 unsigned index; 599 sscanf(symbname + 9, "%u[];", &index); 600 O << *CurrentFnSym << "_param_" << index << "_offset"; 601 } else 602 O << symbname; 603 break; 604 } 605 606 case MachineOperand::MO_MachineBasicBlock: 607 O << *MO.getMBB()->getSymbol(); 608 return; 609 610 default: 611 llvm_unreachable("Operand type not supported."); 612 } 613} 614 615void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, 616 raw_ostream &O) const { 617#ifndef __OPTIMIZE__ 618 O << "\t// Implicit def :"; 619 //printOperand(MI, 0); 620 O << "\n"; 621#endif 622} 623 624void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 625 raw_ostream &O, const char *Modifier) { 626 printOperand(MI, opNum, O); 627 628 if (Modifier && !strcmp(Modifier, "add")) { 629 O << ", "; 630 printOperand(MI, opNum + 1, O); 631 } else { 632 if (MI->getOperand(opNum + 1).isImm() && 633 MI->getOperand(opNum + 1).getImm() == 0) 634 return; // don't print ',0' or '+0' 635 O << "+"; 636 printOperand(MI, opNum + 1, O); 637 } 638} 639 640void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, 641 raw_ostream &O, const char *Modifier) { 642 if (Modifier) { 643 const MachineOperand &MO = MI->getOperand(opNum); 644 int Imm = (int) MO.getImm(); 645 if (!strcmp(Modifier, "volatile")) { 646 if (Imm) 647 O << ".volatile"; 648 } else if (!strcmp(Modifier, "addsp")) { 649 switch (Imm) { 650 case NVPTX::PTXLdStInstCode::GLOBAL: 651 O << ".global"; 652 break; 653 case NVPTX::PTXLdStInstCode::SHARED: 654 O << ".shared"; 655 break; 656 case NVPTX::PTXLdStInstCode::LOCAL: 657 O << ".local"; 658 break; 659 case NVPTX::PTXLdStInstCode::PARAM: 660 O << ".param"; 661 break; 662 case NVPTX::PTXLdStInstCode::CONSTANT: 663 O << ".const"; 664 break; 665 case NVPTX::PTXLdStInstCode::GENERIC: 666 if (!nvptxSubtarget.hasGenericLdSt()) 667 O << ".global"; 668 break; 669 default: 670 llvm_unreachable("Wrong Address Space"); 671 } 672 } else if (!strcmp(Modifier, "sign")) { 673 if (Imm == NVPTX::PTXLdStInstCode::Signed) 674 O << "s"; 675 else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) 676 O << "u"; 677 else 678 O << "f"; 679 } else if (!strcmp(Modifier, "vec")) { 680 if (Imm == NVPTX::PTXLdStInstCode::V2) 681 O << ".v2"; 682 else if (Imm == NVPTX::PTXLdStInstCode::V4) 683 O << ".v4"; 684 } else 685 llvm_unreachable("Unknown Modifier"); 686 } else 687 llvm_unreachable("Empty Modifier"); 688} 689 690void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 691 692 emitLinkageDirective(F, O); 693 if (llvm::isKernelFunction(*F)) 694 O << ".entry "; 695 else 696 O << ".func "; 697 printReturnValStr(F, O); 698 O << *CurrentFnSym << "\n"; 699 emitFunctionParamList(F, O); 700 O << ";\n"; 701} 702 703static bool usedInGlobalVarDef(const Constant *C) { 704 if (!C) 705 return false; 706 707 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 708 if (GV->getName().str() == "llvm.used") 709 return false; 710 return true; 711 } 712 713 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 714 ui != ue; ++ui) { 715 const Constant *C = dyn_cast<Constant>(*ui); 716 if (usedInGlobalVarDef(C)) 717 return true; 718 } 719 return false; 720} 721 722static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 723 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 724 if (othergv->getName().str() == "llvm.used") 725 return true; 726 } 727 728 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 729 if (instr->getParent() && instr->getParent()->getParent()) { 730 const Function *curFunc = instr->getParent()->getParent(); 731 if (oneFunc && (curFunc != oneFunc)) 732 return false; 733 oneFunc = curFunc; 734 return true; 735 } else 736 return false; 737 } 738 739 if (const MDNode *md = dyn_cast<MDNode>(U)) 740 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 741 (md->getName().str() == "llvm.dbg.sp"))) 742 return true; 743 744 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 745 ui != ue; ++ui) { 746 if (usedInOneFunc(*ui, oneFunc) == false) 747 return false; 748 } 749 return true; 750} 751 752/* Find out if a global variable can be demoted to local scope. 753 * Currently, this is valid for CUDA shared variables, which have local 754 * scope and global lifetime. So the conditions to check are : 755 * 1. Is the global variable in shared address space? 756 * 2. Does it have internal linkage? 757 * 3. Is the global variable referenced only in one function? 758 */ 759static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 760 if (gv->hasInternalLinkage() == false) 761 return false; 762 const PointerType *Pty = gv->getType(); 763 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 764 return false; 765 766 const Function *oneFunc = 0; 767 768 bool flag = usedInOneFunc(gv, oneFunc); 769 if (flag == false) 770 return false; 771 if (!oneFunc) 772 return false; 773 f = oneFunc; 774 return true; 775} 776 777static bool useFuncSeen(const Constant *C, 778 llvm::DenseMap<const Function *, bool> &seenMap) { 779 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 780 ui != ue; ++ui) { 781 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 782 if (useFuncSeen(cu, seenMap)) 783 return true; 784 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 785 const BasicBlock *bb = I->getParent(); 786 if (!bb) 787 continue; 788 const Function *caller = bb->getParent(); 789 if (!caller) 790 continue; 791 if (seenMap.find(caller) != seenMap.end()) 792 return true; 793 } 794 } 795 return false; 796} 797 798void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { 799 llvm::DenseMap<const Function *, bool> seenMap; 800 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 801 const Function *F = FI; 802 803 if (F->isDeclaration()) { 804 if (F->use_empty()) 805 continue; 806 if (F->getIntrinsicID()) 807 continue; 808 CurrentFnSym = Mang->getSymbol(F); 809 emitDeclaration(F, O); 810 continue; 811 } 812 for (Value::const_use_iterator iter = F->use_begin(), 813 iterEnd = F->use_end(); 814 iter != iterEnd; ++iter) { 815 if (const Constant *C = dyn_cast<Constant>(*iter)) { 816 if (usedInGlobalVarDef(C)) { 817 // The use is in the initialization of a global variable 818 // that is a function pointer, so print a declaration 819 // for the original function 820 CurrentFnSym = Mang->getSymbol(F); 821 emitDeclaration(F, O); 822 break; 823 } 824 // Emit a declaration of this function if the function that 825 // uses this constant expr has already been seen. 826 if (useFuncSeen(C, seenMap)) { 827 CurrentFnSym = Mang->getSymbol(F); 828 emitDeclaration(F, O); 829 break; 830 } 831 } 832 833 if (!isa<Instruction>(*iter)) 834 continue; 835 const Instruction *instr = cast<Instruction>(*iter); 836 const BasicBlock *bb = instr->getParent(); 837 if (!bb) 838 continue; 839 const Function *caller = bb->getParent(); 840 if (!caller) 841 continue; 842 843 // If a caller has already been seen, then the caller is 844 // appearing in the module before the callee. so print out 845 // a declaration for the callee. 846 if (seenMap.find(caller) != seenMap.end()) { 847 CurrentFnSym = Mang->getSymbol(F); 848 emitDeclaration(F, O); 849 break; 850 } 851 } 852 seenMap[F] = true; 853 } 854} 855 856void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 857 DebugInfoFinder DbgFinder; 858 DbgFinder.processModule(M); 859 860 unsigned i = 1; 861 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 862 E = DbgFinder.compile_unit_end(); 863 I != E; ++I) { 864 DICompileUnit DIUnit(*I); 865 StringRef Filename(DIUnit.getFilename()); 866 StringRef Dirname(DIUnit.getDirectory()); 867 SmallString<128> FullPathName = Dirname; 868 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 869 sys::path::append(FullPathName, Filename); 870 Filename = FullPathName.str(); 871 } 872 if (filenameMap.find(Filename.str()) != filenameMap.end()) 873 continue; 874 filenameMap[Filename.str()] = i; 875 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 876 ++i; 877 } 878 879 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 880 E = DbgFinder.subprogram_end(); 881 I != E; ++I) { 882 DISubprogram SP(*I); 883 StringRef Filename(SP.getFilename()); 884 StringRef Dirname(SP.getDirectory()); 885 SmallString<128> FullPathName = Dirname; 886 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 887 sys::path::append(FullPathName, Filename); 888 Filename = FullPathName.str(); 889 } 890 if (filenameMap.find(Filename.str()) != filenameMap.end()) 891 continue; 892 filenameMap[Filename.str()] = i; 893 ++i; 894 } 895} 896 897bool NVPTXAsmPrinter::doInitialization(Module &M) { 898 899 SmallString<128> Str1; 900 raw_svector_ostream OS1(Str1); 901 902 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 903 MMI->AnalyzeModule(M); 904 905 // We need to call the parent's one explicitly. 906 //bool Result = AsmPrinter::doInitialization(M); 907 908 // Initialize TargetLoweringObjectFile. 909 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 910 .Initialize(OutContext, TM); 911 912 Mang = new Mangler(OutContext, *TM.getDataLayout()); 913 914 // Emit header before any dwarf directives are emitted below. 915 emitHeader(M, OS1); 916 OutStreamer.EmitRawText(OS1.str()); 917 918 // Already commented out 919 //bool Result = AsmPrinter::doInitialization(M); 920 921 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 922 recordAndEmitFilenames(M); 923 924 SmallString<128> Str2; 925 raw_svector_ostream OS2(Str2); 926 927 emitDeclarations(M, OS2); 928 929 // As ptxas does not support forward references of globals, we need to first 930 // sort the list of module-level globals in def-use order. We visit each 931 // global variable in order, and ensure that we emit it *after* its dependent 932 // globals. We use a little extra memory maintaining both a set and a list to 933 // have fast searches while maintaining a strict ordering. 934 SmallVector<GlobalVariable *, 8> Globals; 935 DenseSet<GlobalVariable *> GVVisited; 936 DenseSet<GlobalVariable *> GVVisiting; 937 938 // Visit each global variable, in order 939 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; 940 ++I) 941 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 942 943 assert(GVVisited.size() == M.getGlobalList().size() && 944 "Missed a global variable"); 945 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 946 947 // Print out module-level global variables in proper order 948 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 949 printModuleLevelGV(Globals[i], OS2); 950 951 OS2 << '\n'; 952 953 OutStreamer.EmitRawText(OS2.str()); 954 return false; // success 955} 956 957void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 958 O << "//\n"; 959 O << "// Generated by LLVM NVPTX Back-End\n"; 960 O << "//\n"; 961 O << "\n"; 962 963 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 964 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 965 966 O << ".target "; 967 O << nvptxSubtarget.getTargetName(); 968 969 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 970 O << ", texmode_independent"; 971 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 972 if (!nvptxSubtarget.hasDouble()) 973 O << ", map_f64_to_f32"; 974 } 975 976 if (MAI->doesSupportDebugInformation()) 977 O << ", debug"; 978 979 O << "\n"; 980 981 O << ".address_size "; 982 if (nvptxSubtarget.is64Bit()) 983 O << "64"; 984 else 985 O << "32"; 986 O << "\n"; 987 988 O << "\n"; 989} 990 991bool NVPTXAsmPrinter::doFinalization(Module &M) { 992 // XXX Temproarily remove global variables so that doFinalization() will not 993 // emit them again (global variables are emitted at beginning). 994 995 Module::GlobalListType &global_list = M.getGlobalList(); 996 int i, n = global_list.size(); 997 GlobalVariable **gv_array = new GlobalVariable *[n]; 998 999 // first, back-up GlobalVariable in gv_array 1000 i = 0; 1001 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1002 I != E; ++I) 1003 gv_array[i++] = &*I; 1004 1005 // second, empty global_list 1006 while (!global_list.empty()) 1007 global_list.remove(global_list.begin()); 1008 1009 // call doFinalization 1010 bool ret = AsmPrinter::doFinalization(M); 1011 1012 // now we restore global variables 1013 for (i = 0; i < n; i++) 1014 global_list.insert(global_list.end(), gv_array[i]); 1015 1016 delete[] gv_array; 1017 return ret; 1018 1019 //bool Result = AsmPrinter::doFinalization(M); 1020 // Instead of calling the parents doFinalization, we may 1021 // clone parents doFinalization and customize here. 1022 // Currently, we if NVISA out the EmitGlobals() in 1023 // parent's doFinalization, which is too intrusive. 1024 // 1025 // Same for the doInitialization. 1026 //return Result; 1027} 1028 1029// This function emits appropriate linkage directives for 1030// functions and global variables. 1031// 1032// extern function declaration -> .extern 1033// extern function definition -> .visible 1034// external global variable with init -> .visible 1035// external without init -> .extern 1036// appending -> not allowed, assert. 1037 1038void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1039 raw_ostream &O) { 1040 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1041 if (V->hasExternalLinkage()) { 1042 if (isa<GlobalVariable>(V)) { 1043 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1044 if (GVar) { 1045 if (GVar->hasInitializer()) 1046 O << ".visible "; 1047 else 1048 O << ".extern "; 1049 } 1050 } else if (V->isDeclaration()) 1051 O << ".extern "; 1052 else 1053 O << ".visible "; 1054 } else if (V->hasAppendingLinkage()) { 1055 std::string msg; 1056 msg.append("Error: "); 1057 msg.append("Symbol "); 1058 if (V->hasName()) 1059 msg.append(V->getName().str()); 1060 msg.append("has unsupported appending linkage type"); 1061 llvm_unreachable(msg.c_str()); 1062 } 1063 } 1064} 1065 1066void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, 1067 bool processDemoted) { 1068 1069 // Skip meta data 1070 if (GVar->hasSection()) { 1071 if (GVar->getSection() == "llvm.metadata") 1072 return; 1073 } 1074 1075 const DataLayout *TD = TM.getDataLayout(); 1076 1077 // GlobalVariables are always constant pointers themselves. 1078 const PointerType *PTy = GVar->getType(); 1079 Type *ETy = PTy->getElementType(); 1080 1081 if (GVar->hasExternalLinkage()) { 1082 if (GVar->hasInitializer()) 1083 O << ".visible "; 1084 else 1085 O << ".extern "; 1086 } 1087 1088 if (llvm::isTexture(*GVar)) { 1089 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1090 return; 1091 } 1092 1093 if (llvm::isSurface(*GVar)) { 1094 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1095 return; 1096 } 1097 1098 if (GVar->isDeclaration()) { 1099 // (extern) declarations, no definition or initializer 1100 // Currently the only known declaration is for an automatic __local 1101 // (.shared) promoted to global. 1102 emitPTXGlobalVariable(GVar, O); 1103 O << ";\n"; 1104 return; 1105 } 1106 1107 if (llvm::isSampler(*GVar)) { 1108 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1109 1110 Constant *Initializer = NULL; 1111 if (GVar->hasInitializer()) 1112 Initializer = GVar->getInitializer(); 1113 ConstantInt *CI = NULL; 1114 if (Initializer) 1115 CI = dyn_cast<ConstantInt>(Initializer); 1116 if (CI) { 1117 unsigned sample = CI->getZExtValue(); 1118 1119 O << " = { "; 1120 1121 for (int i = 0, 1122 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1123 i < 3; i++) { 1124 O << "addr_mode_" << i << " = "; 1125 switch (addr) { 1126 case 0: 1127 O << "wrap"; 1128 break; 1129 case 1: 1130 O << "clamp_to_border"; 1131 break; 1132 case 2: 1133 O << "clamp_to_edge"; 1134 break; 1135 case 3: 1136 O << "wrap"; 1137 break; 1138 case 4: 1139 O << "mirror"; 1140 break; 1141 } 1142 O << ", "; 1143 } 1144 O << "filter_mode = "; 1145 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1146 case 0: 1147 O << "nearest"; 1148 break; 1149 case 1: 1150 O << "linear"; 1151 break; 1152 case 2: 1153 assert(0 && "Anisotropic filtering is not supported"); 1154 default: 1155 O << "nearest"; 1156 break; 1157 } 1158 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1159 O << ", force_unnormalized_coords = 1"; 1160 } 1161 O << " }"; 1162 } 1163 1164 O << ";\n"; 1165 return; 1166 } 1167 1168 if (GVar->hasPrivateLinkage()) { 1169 1170 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1171 return; 1172 1173 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1174 if (!strncmp(GVar->getName().data(), "filename", 8)) 1175 return; 1176 if (GVar->use_empty()) 1177 return; 1178 } 1179 1180 const Function *demotedFunc = 0; 1181 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1182 O << "// " << GVar->getName().str() << " has been demoted\n"; 1183 if (localDecls.find(demotedFunc) != localDecls.end()) 1184 localDecls[demotedFunc].push_back(GVar); 1185 else { 1186 std::vector<GlobalVariable *> temp; 1187 temp.push_back(GVar); 1188 localDecls[demotedFunc] = temp; 1189 } 1190 return; 1191 } 1192 1193 O << "."; 1194 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1195 if (GVar->getAlignment() == 0) 1196 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1197 else 1198 O << " .align " << GVar->getAlignment(); 1199 1200 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1201 O << " ."; 1202 O << getPTXFundamentalTypeStr(ETy, false); 1203 O << " "; 1204 O << *Mang->getSymbol(GVar); 1205 1206 // Ptx allows variable initilization only for constant and global state 1207 // spaces. 1208 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1209 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1210 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1211 GVar->hasInitializer()) { 1212 Constant *Initializer = GVar->getInitializer(); 1213 if (!Initializer->isNullValue()) { 1214 O << " = "; 1215 printScalarConstant(Initializer, O); 1216 } 1217 } 1218 } else { 1219 unsigned int ElementSize = 0; 1220 1221 // Although PTX has direct support for struct type and array type and 1222 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1223 // targets that support these high level field accesses. Structs, arrays 1224 // and vectors are lowered into arrays of bytes. 1225 switch (ETy->getTypeID()) { 1226 case Type::StructTyID: 1227 case Type::ArrayTyID: 1228 case Type::VectorTyID: 1229 ElementSize = TD->getTypeStoreSize(ETy); 1230 // Ptx allows variable initilization only for constant and 1231 // global state spaces. 1232 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1233 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1234 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1235 GVar->hasInitializer()) { 1236 Constant *Initializer = GVar->getInitializer(); 1237 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1238 AggBuffer aggBuffer(ElementSize, O, *this); 1239 bufferAggregateConstant(Initializer, &aggBuffer); 1240 if (aggBuffer.numSymbols) { 1241 if (nvptxSubtarget.is64Bit()) { 1242 O << " .u64 " << *Mang->getSymbol(GVar) << "["; 1243 O << ElementSize / 8; 1244 } else { 1245 O << " .u32 " << *Mang->getSymbol(GVar) << "["; 1246 O << ElementSize / 4; 1247 } 1248 O << "]"; 1249 } else { 1250 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1251 O << ElementSize; 1252 O << "]"; 1253 } 1254 O << " = {"; 1255 aggBuffer.print(); 1256 O << "}"; 1257 } else { 1258 O << " .b8 " << *Mang->getSymbol(GVar); 1259 if (ElementSize) { 1260 O << "["; 1261 O << ElementSize; 1262 O << "]"; 1263 } 1264 } 1265 } else { 1266 O << " .b8 " << *Mang->getSymbol(GVar); 1267 if (ElementSize) { 1268 O << "["; 1269 O << ElementSize; 1270 O << "]"; 1271 } 1272 } 1273 break; 1274 default: 1275 assert(0 && "type not supported yet"); 1276 } 1277 1278 } 1279 O << ";\n"; 1280} 1281 1282void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1283 if (localDecls.find(f) == localDecls.end()) 1284 return; 1285 1286 std::vector<GlobalVariable *> &gvars = localDecls[f]; 1287 1288 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1289 O << "\t// demoted variable\n\t"; 1290 printModuleLevelGV(gvars[i], O, true); 1291 } 1292} 1293 1294void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1295 raw_ostream &O) const { 1296 switch (AddressSpace) { 1297 case llvm::ADDRESS_SPACE_LOCAL: 1298 O << "local"; 1299 break; 1300 case llvm::ADDRESS_SPACE_GLOBAL: 1301 O << "global"; 1302 break; 1303 case llvm::ADDRESS_SPACE_CONST: 1304 // This logic should be consistent with that in 1305 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) 1306 if (nvptxSubtarget.hasGenericLdSt()) 1307 O << "global"; 1308 else 1309 O << "const"; 1310 break; 1311 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1312 O << "const"; 1313 break; 1314 case llvm::ADDRESS_SPACE_SHARED: 1315 O << "shared"; 1316 break; 1317 default: 1318 report_fatal_error("Bad address space found while emitting PTX"); 1319 break; 1320 } 1321} 1322 1323std::string 1324NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1325 switch (Ty->getTypeID()) { 1326 default: 1327 llvm_unreachable("unexpected type"); 1328 break; 1329 case Type::IntegerTyID: { 1330 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1331 if (NumBits == 1) 1332 return "pred"; 1333 else if (NumBits <= 64) { 1334 std::string name = "u"; 1335 return name + utostr(NumBits); 1336 } else { 1337 llvm_unreachable("Integer too large"); 1338 break; 1339 } 1340 break; 1341 } 1342 case Type::FloatTyID: 1343 return "f32"; 1344 case Type::DoubleTyID: 1345 return "f64"; 1346 case Type::PointerTyID: 1347 if (nvptxSubtarget.is64Bit()) 1348 if (useB4PTR) 1349 return "b64"; 1350 else 1351 return "u64"; 1352 else if (useB4PTR) 1353 return "b32"; 1354 else 1355 return "u32"; 1356 } 1357 llvm_unreachable("unexpected type"); 1358 return NULL; 1359} 1360 1361void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1362 raw_ostream &O) { 1363 1364 const DataLayout *TD = TM.getDataLayout(); 1365 1366 // GlobalVariables are always constant pointers themselves. 1367 const PointerType *PTy = GVar->getType(); 1368 Type *ETy = PTy->getElementType(); 1369 1370 O << "."; 1371 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1372 if (GVar->getAlignment() == 0) 1373 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1374 else 1375 O << " .align " << GVar->getAlignment(); 1376 1377 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1378 O << " ."; 1379 O << getPTXFundamentalTypeStr(ETy); 1380 O << " "; 1381 O << *Mang->getSymbol(GVar); 1382 return; 1383 } 1384 1385 int64_t ElementSize = 0; 1386 1387 // Although PTX has direct support for struct type and array type and LLVM IR 1388 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1389 // support these high level field accesses. Structs and arrays are lowered 1390 // into arrays of bytes. 1391 switch (ETy->getTypeID()) { 1392 case Type::StructTyID: 1393 case Type::ArrayTyID: 1394 case Type::VectorTyID: 1395 ElementSize = TD->getTypeStoreSize(ETy); 1396 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1397 if (ElementSize) { 1398 O << itostr(ElementSize); 1399 } 1400 O << "]"; 1401 break; 1402 default: 1403 assert(0 && "type not supported yet"); 1404 } 1405 return; 1406} 1407 1408static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1409 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1410 return TD->getPrefTypeAlignment(Ty); 1411 1412 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1413 if (ATy) 1414 return getOpenCLAlignment(TD, ATy->getElementType()); 1415 1416 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1417 if (VTy) { 1418 Type *ETy = VTy->getElementType(); 1419 unsigned int numE = VTy->getNumElements(); 1420 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1421 if (numE == 3) 1422 return 4 * alignE; 1423 else 1424 return numE * alignE; 1425 } 1426 1427 const StructType *STy = dyn_cast<StructType>(Ty); 1428 if (STy) { 1429 unsigned int alignStruct = 1; 1430 // Go through each element of the struct and find the 1431 // largest alignment. 1432 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1433 Type *ETy = STy->getElementType(i); 1434 unsigned int align = getOpenCLAlignment(TD, ETy); 1435 if (align > alignStruct) 1436 alignStruct = align; 1437 } 1438 return alignStruct; 1439 } 1440 1441 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1442 if (FTy) 1443 return TD->getPointerPrefAlignment(); 1444 return TD->getPrefTypeAlignment(Ty); 1445} 1446 1447void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1448 int paramIndex, raw_ostream &O) { 1449 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1450 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1451 O << *CurrentFnSym << "_param_" << paramIndex; 1452 else { 1453 std::string argName = I->getName(); 1454 const char *p = argName.c_str(); 1455 while (*p) { 1456 if (*p == '.') 1457 O << "_"; 1458 else 1459 O << *p; 1460 p++; 1461 } 1462 } 1463} 1464 1465void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1466 Function::const_arg_iterator I, E; 1467 int i = 0; 1468 1469 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1470 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1471 O << *CurrentFnSym << "_param_" << paramIndex; 1472 return; 1473 } 1474 1475 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1476 if (i == paramIndex) { 1477 printParamName(I, paramIndex, O); 1478 return; 1479 } 1480 } 1481 llvm_unreachable("paramIndex out of bound"); 1482} 1483 1484void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1485 const DataLayout *TD = TM.getDataLayout(); 1486 const AttributeSet &PAL = F->getAttributes(); 1487 const TargetLowering *TLI = TM.getTargetLowering(); 1488 Function::const_arg_iterator I, E; 1489 unsigned paramIndex = 0; 1490 bool first = true; 1491 bool isKernelFunc = llvm::isKernelFunction(*F); 1492 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1493 MVT thePointerTy = TLI->getPointerTy(); 1494 1495 O << "(\n"; 1496 1497 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1498 Type *Ty = I->getType(); 1499 1500 if (!first) 1501 O << ",\n"; 1502 1503 first = false; 1504 1505 // Handle image/sampler parameters 1506 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1507 if (llvm::isImage(*I)) { 1508 std::string sname = I->getName(); 1509 if (llvm::isImageWriteOnly(*I)) 1510 O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; 1511 else // Default image is read_only 1512 O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; 1513 } else // Should be llvm::isSampler(*I) 1514 O << "\t.param .samplerref " << *CurrentFnSym << "_param_" 1515 << paramIndex; 1516 continue; 1517 } 1518 1519 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1520 if (Ty->isVectorTy()) { 1521 // Just print .param .b8 .align <a> .param[size]; 1522 // <a> = PAL.getparamalignment 1523 // size = typeallocsize of element type 1524 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1525 if (align == 0) 1526 align = TD->getABITypeAlignment(Ty); 1527 1528 unsigned sz = TD->getTypeAllocSize(Ty); 1529 O << "\t.param .align " << align << " .b8 "; 1530 printParamName(I, paramIndex, O); 1531 O << "[" << sz << "]"; 1532 1533 continue; 1534 } 1535 // Just a scalar 1536 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1537 if (isKernelFunc) { 1538 if (PTy) { 1539 // Special handling for pointer arguments to kernel 1540 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1541 1542 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1543 Type *ETy = PTy->getElementType(); 1544 int addrSpace = PTy->getAddressSpace(); 1545 switch (addrSpace) { 1546 default: 1547 O << ".ptr "; 1548 break; 1549 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1550 O << ".ptr .const "; 1551 break; 1552 case llvm::ADDRESS_SPACE_SHARED: 1553 O << ".ptr .shared "; 1554 break; 1555 case llvm::ADDRESS_SPACE_GLOBAL: 1556 case llvm::ADDRESS_SPACE_CONST: 1557 O << ".ptr .global "; 1558 break; 1559 } 1560 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1561 } 1562 printParamName(I, paramIndex, O); 1563 continue; 1564 } 1565 1566 // non-pointer scalar to kernel func 1567 O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; 1568 printParamName(I, paramIndex, O); 1569 continue; 1570 } 1571 // Non-kernel function, just print .param .b<size> for ABI 1572 // and .reg .b<size> for non ABY 1573 unsigned sz = 0; 1574 if (isa<IntegerType>(Ty)) { 1575 sz = cast<IntegerType>(Ty)->getBitWidth(); 1576 if (sz < 32) 1577 sz = 32; 1578 } else if (isa<PointerType>(Ty)) 1579 sz = thePointerTy.getSizeInBits(); 1580 else 1581 sz = Ty->getPrimitiveSizeInBits(); 1582 if (isABI) 1583 O << "\t.param .b" << sz << " "; 1584 else 1585 O << "\t.reg .b" << sz << " "; 1586 printParamName(I, paramIndex, O); 1587 continue; 1588 } 1589 1590 // param has byVal attribute. So should be a pointer 1591 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1592 assert(PTy && "Param with byval attribute should be a pointer type"); 1593 Type *ETy = PTy->getElementType(); 1594 1595 if (isABI || isKernelFunc) { 1596 // Just print .param .b8 .align <a> .param[size]; 1597 // <a> = PAL.getparamalignment 1598 // size = typeallocsize of element type 1599 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1600 if (align == 0) 1601 align = TD->getABITypeAlignment(ETy); 1602 1603 unsigned sz = TD->getTypeAllocSize(ETy); 1604 O << "\t.param .align " << align << " .b8 "; 1605 printParamName(I, paramIndex, O); 1606 O << "[" << sz << "]"; 1607 continue; 1608 } else { 1609 // Split the ETy into constituent parts and 1610 // print .param .b<size> <name> for each part. 1611 // Further, if a part is vector, print the above for 1612 // each vector element. 1613 SmallVector<EVT, 16> vtparts; 1614 ComputeValueVTs(*TLI, ETy, vtparts); 1615 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1616 unsigned elems = 1; 1617 EVT elemtype = vtparts[i]; 1618 if (vtparts[i].isVector()) { 1619 elems = vtparts[i].getVectorNumElements(); 1620 elemtype = vtparts[i].getVectorElementType(); 1621 } 1622 1623 for (unsigned j = 0, je = elems; j != je; ++j) { 1624 unsigned sz = elemtype.getSizeInBits(); 1625 if (elemtype.isInteger() && (sz < 32)) 1626 sz = 32; 1627 O << "\t.reg .b" << sz << " "; 1628 printParamName(I, paramIndex, O); 1629 if (j < je - 1) 1630 O << ",\n"; 1631 ++paramIndex; 1632 } 1633 if (i < e - 1) 1634 O << ",\n"; 1635 } 1636 --paramIndex; 1637 continue; 1638 } 1639 } 1640 1641 O << "\n)\n"; 1642} 1643 1644void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1645 raw_ostream &O) { 1646 const Function *F = MF.getFunction(); 1647 emitFunctionParamList(F, O); 1648} 1649 1650void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1651 const MachineFunction &MF) { 1652 SmallString<128> Str; 1653 raw_svector_ostream O(Str); 1654 1655 // Map the global virtual register number to a register class specific 1656 // virtual register number starting from 1 with that class. 1657 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1658 //unsigned numRegClasses = TRI->getNumRegClasses(); 1659 1660 // Emit the Fake Stack Object 1661 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1662 int NumBytes = (int) MFI->getStackSize(); 1663 if (NumBytes) { 1664 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1665 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1666 if (nvptxSubtarget.is64Bit()) { 1667 O << "\t.reg .b64 \t%SP;\n"; 1668 O << "\t.reg .b64 \t%SPL;\n"; 1669 } else { 1670 O << "\t.reg .b32 \t%SP;\n"; 1671 O << "\t.reg .b32 \t%SPL;\n"; 1672 } 1673 } 1674 1675 // Go through all virtual registers to establish the mapping between the 1676 // global virtual 1677 // register number and the per class virtual register number. 1678 // We use the per class virtual register number in the ptx output. 1679 unsigned int numVRs = MRI->getNumVirtRegs(); 1680 for (unsigned i = 0; i < numVRs; i++) { 1681 unsigned int vr = TRI->index2VirtReg(i); 1682 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1683 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; 1684 int n = regmap.size(); 1685 regmap.insert(std::make_pair(vr, n + 1)); 1686 } 1687 1688 // Emit register declarations 1689 // @TODO: Extract out the real register usage 1690 O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1691 O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1692 O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1693 O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1694 O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1695 O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1696 O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1697 1698 // Emit declaration of the virtual registers or 'physical' registers for 1699 // each register class 1700 //for (unsigned i=0; i< numRegClasses; i++) { 1701 // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; 1702 // const TargetRegisterClass *RC = TRI->getRegClass(i); 1703 // std::string rcname = getNVPTXRegClassName(RC); 1704 // std::string rcStr = getNVPTXRegClassStr(RC); 1705 // //int n = regmap.size(); 1706 // if (!isNVPTXVectorRegClass(RC)) { 1707 // O << "\t.reg " << rcname << " \t" << rcStr << "<" 1708 // << NVPTXNumRegisters << ">;\n"; 1709 // } 1710 1711 // Only declare those registers that may be used. And do not emit vector 1712 // registers as 1713 // they are all elementized to scalar registers. 1714 //if (n && !isNVPTXVectorRegClass(RC)) { 1715 // if (RegAllocNilUsed) { 1716 // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1717 // << ">;\n"; 1718 // } 1719 // else { 1720 // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) 1721 // << "<" << 32 << ">;\n"; 1722 // } 1723 //} 1724 //} 1725 1726 OutStreamer.EmitRawText(O.str()); 1727} 1728 1729void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1730 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1731 bool ignored; 1732 unsigned int numHex; 1733 const char *lead; 1734 1735 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1736 numHex = 8; 1737 lead = "0f"; 1738 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1739 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1740 numHex = 16; 1741 lead = "0d"; 1742 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1743 } else 1744 llvm_unreachable("unsupported fp type"); 1745 1746 APInt API = APF.bitcastToAPInt(); 1747 std::string hexstr(utohexstr(API.getZExtValue())); 1748 O << lead; 1749 if (hexstr.length() < numHex) 1750 O << std::string(numHex - hexstr.length(), '0'); 1751 O << utohexstr(API.getZExtValue()); 1752} 1753 1754void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { 1755 if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1756 O << CI->getValue(); 1757 return; 1758 } 1759 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1760 printFPConstant(CFP, O); 1761 return; 1762 } 1763 if (isa<ConstantPointerNull>(CPV)) { 1764 O << "0"; 1765 return; 1766 } 1767 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1768 O << *Mang->getSymbol(GVar); 1769 return; 1770 } 1771 if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1772 Value *v = Cexpr->stripPointerCasts(); 1773 if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1774 O << *Mang->getSymbol(GVar); 1775 return; 1776 } else { 1777 O << *LowerConstant(CPV, *this); 1778 return; 1779 } 1780 } 1781 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1782} 1783 1784void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, 1785 AggBuffer *aggBuffer) { 1786 1787 const DataLayout *TD = TM.getDataLayout(); 1788 1789 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1790 int s = TD->getTypeAllocSize(CPV->getType()); 1791 if (s < Bytes) 1792 s = Bytes; 1793 aggBuffer->addZeros(s); 1794 return; 1795 } 1796 1797 unsigned char *ptr; 1798 switch (CPV->getType()->getTypeID()) { 1799 1800 case Type::IntegerTyID: { 1801 const Type *ETy = CPV->getType(); 1802 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1803 unsigned char c = 1804 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1805 ptr = &c; 1806 aggBuffer->addBytes(ptr, 1, Bytes); 1807 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1808 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1809 ptr = (unsigned char *)&int16; 1810 aggBuffer->addBytes(ptr, 2, Bytes); 1811 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1812 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1813 int int32 = (int)(constInt->getZExtValue()); 1814 ptr = (unsigned char *)&int32; 1815 aggBuffer->addBytes(ptr, 4, Bytes); 1816 break; 1817 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1818 if (ConstantInt *constInt = dyn_cast<ConstantInt>( 1819 ConstantFoldConstantExpression(Cexpr, TD))) { 1820 int int32 = (int)(constInt->getZExtValue()); 1821 ptr = (unsigned char *)&int32; 1822 aggBuffer->addBytes(ptr, 4, Bytes); 1823 break; 1824 } 1825 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1826 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1827 aggBuffer->addSymbol(v); 1828 aggBuffer->addZeros(4); 1829 break; 1830 } 1831 } 1832 llvm_unreachable("unsupported integer const type"); 1833 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1834 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1835 long long int64 = (long long)(constInt->getZExtValue()); 1836 ptr = (unsigned char *)&int64; 1837 aggBuffer->addBytes(ptr, 8, Bytes); 1838 break; 1839 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1840 if (ConstantInt *constInt = dyn_cast<ConstantInt>( 1841 ConstantFoldConstantExpression(Cexpr, TD))) { 1842 long long int64 = (long long)(constInt->getZExtValue()); 1843 ptr = (unsigned char *)&int64; 1844 aggBuffer->addBytes(ptr, 8, Bytes); 1845 break; 1846 } 1847 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1848 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1849 aggBuffer->addSymbol(v); 1850 aggBuffer->addZeros(8); 1851 break; 1852 } 1853 } 1854 llvm_unreachable("unsupported integer const type"); 1855 } else 1856 llvm_unreachable("unsupported integer const type"); 1857 break; 1858 } 1859 case Type::FloatTyID: 1860 case Type::DoubleTyID: { 1861 ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1862 const Type *Ty = CFP->getType(); 1863 if (Ty == Type::getFloatTy(CPV->getContext())) { 1864 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1865 ptr = (unsigned char *)&float32; 1866 aggBuffer->addBytes(ptr, 4, Bytes); 1867 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1868 double float64 = CFP->getValueAPF().convertToDouble(); 1869 ptr = (unsigned char *)&float64; 1870 aggBuffer->addBytes(ptr, 8, Bytes); 1871 } else { 1872 llvm_unreachable("unsupported fp const type"); 1873 } 1874 break; 1875 } 1876 case Type::PointerTyID: { 1877 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1878 aggBuffer->addSymbol(GVar); 1879 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1880 Value *v = Cexpr->stripPointerCasts(); 1881 aggBuffer->addSymbol(v); 1882 } 1883 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1884 aggBuffer->addZeros(s); 1885 break; 1886 } 1887 1888 case Type::ArrayTyID: 1889 case Type::VectorTyID: 1890 case Type::StructTyID: { 1891 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1892 isa<ConstantStruct>(CPV)) { 1893 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1894 bufferAggregateConstant(CPV, aggBuffer); 1895 if (Bytes > ElementSize) 1896 aggBuffer->addZeros(Bytes - ElementSize); 1897 } else if (isa<ConstantAggregateZero>(CPV)) 1898 aggBuffer->addZeros(Bytes); 1899 else 1900 llvm_unreachable("Unexpected Constant type"); 1901 break; 1902 } 1903 1904 default: 1905 llvm_unreachable("unsupported type"); 1906 } 1907} 1908 1909void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, 1910 AggBuffer *aggBuffer) { 1911 const DataLayout *TD = TM.getDataLayout(); 1912 int Bytes; 1913 1914 // Old constants 1915 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1916 if (CPV->getNumOperands()) 1917 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1918 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1919 return; 1920 } 1921 1922 if (const ConstantDataSequential *CDS = 1923 dyn_cast<ConstantDataSequential>(CPV)) { 1924 if (CDS->getNumElements()) 1925 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1926 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1927 aggBuffer); 1928 return; 1929 } 1930 1931 if (isa<ConstantStruct>(CPV)) { 1932 if (CPV->getNumOperands()) { 1933 StructType *ST = cast<StructType>(CPV->getType()); 1934 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1935 if (i == (e - 1)) 1936 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1937 TD->getTypeAllocSize(ST) - 1938 TD->getStructLayout(ST)->getElementOffset(i); 1939 else 1940 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1941 TD->getStructLayout(ST)->getElementOffset(i); 1942 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1943 } 1944 } 1945 return; 1946 } 1947 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1948} 1949 1950// buildTypeNameMap - Run through symbol table looking for type names. 1951// 1952 1953bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1954 1955 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1956 1957 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1958 !PI->second.compare("struct._image2d_t") || 1959 !PI->second.compare("struct._image3d_t"))) 1960 return true; 1961 1962 return false; 1963} 1964 1965/// PrintAsmOperand - Print out an operand for an inline asm expression. 1966/// 1967bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 1968 unsigned AsmVariant, 1969 const char *ExtraCode, raw_ostream &O) { 1970 if (ExtraCode && ExtraCode[0]) { 1971 if (ExtraCode[1] != 0) 1972 return true; // Unknown modifier. 1973 1974 switch (ExtraCode[0]) { 1975 default: 1976 // See if this is a generic print operand 1977 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 1978 case 'r': 1979 break; 1980 } 1981 } 1982 1983 printOperand(MI, OpNo, O); 1984 1985 return false; 1986} 1987 1988bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 1989 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 1990 const char *ExtraCode, raw_ostream &O) { 1991 if (ExtraCode && ExtraCode[0]) 1992 return true; // Unknown modifier 1993 1994 O << '['; 1995 printMemOperand(MI, OpNo, O); 1996 O << ']'; 1997 1998 return false; 1999} 2000 2001bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 2002 switch (MI.getOpcode()) { 2003 default: 2004 return false; 2005 case NVPTX::CallArgBeginInst: 2006 case NVPTX::CallArgEndInst0: 2007 case NVPTX::CallArgEndInst1: 2008 case NVPTX::CallArgF32: 2009 case NVPTX::CallArgF64: 2010 case NVPTX::CallArgI16: 2011 case NVPTX::CallArgI32: 2012 case NVPTX::CallArgI32imm: 2013 case NVPTX::CallArgI64: 2014 case NVPTX::CallArgI8: 2015 case NVPTX::CallArgParam: 2016 case NVPTX::CallVoidInst: 2017 case NVPTX::CallVoidInstReg: 2018 case NVPTX::Callseq_End: 2019 case NVPTX::CallVoidInstReg64: 2020 case NVPTX::DeclareParamInst: 2021 case NVPTX::DeclareRetMemInst: 2022 case NVPTX::DeclareRetRegInst: 2023 case NVPTX::DeclareRetScalarInst: 2024 case NVPTX::DeclareScalarParamInst: 2025 case NVPTX::DeclareScalarRegInst: 2026 case NVPTX::StoreParamF32: 2027 case NVPTX::StoreParamF64: 2028 case NVPTX::StoreParamI16: 2029 case NVPTX::StoreParamI32: 2030 case NVPTX::StoreParamI64: 2031 case NVPTX::StoreParamI8: 2032 case NVPTX::StoreParamS32I8: 2033 case NVPTX::StoreParamU32I8: 2034 case NVPTX::StoreParamS32I16: 2035 case NVPTX::StoreParamU32I16: 2036 case NVPTX::StoreRetvalF32: 2037 case NVPTX::StoreRetvalF64: 2038 case NVPTX::StoreRetvalI16: 2039 case NVPTX::StoreRetvalI32: 2040 case NVPTX::StoreRetvalI64: 2041 case NVPTX::StoreRetvalI8: 2042 case NVPTX::LastCallArgF32: 2043 case NVPTX::LastCallArgF64: 2044 case NVPTX::LastCallArgI16: 2045 case NVPTX::LastCallArgI32: 2046 case NVPTX::LastCallArgI32imm: 2047 case NVPTX::LastCallArgI64: 2048 case NVPTX::LastCallArgI8: 2049 case NVPTX::LastCallArgParam: 2050 case NVPTX::LoadParamMemF32: 2051 case NVPTX::LoadParamMemF64: 2052 case NVPTX::LoadParamMemI16: 2053 case NVPTX::LoadParamMemI32: 2054 case NVPTX::LoadParamMemI64: 2055 case NVPTX::LoadParamMemI8: 2056 case NVPTX::LoadParamRegF32: 2057 case NVPTX::LoadParamRegF64: 2058 case NVPTX::LoadParamRegI16: 2059 case NVPTX::LoadParamRegI32: 2060 case NVPTX::LoadParamRegI64: 2061 case NVPTX::LoadParamRegI8: 2062 case NVPTX::PrototypeInst: 2063 case NVPTX::DBG_VALUE: 2064 return true; 2065 } 2066 return false; 2067} 2068 2069// Force static initialization. 2070extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2071 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2072 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2073} 2074 2075void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2076 std::stringstream temp; 2077 LineReader *reader = this->getReader(filename.str()); 2078 temp << "\n//"; 2079 temp << filename.str(); 2080 temp << ":"; 2081 temp << line; 2082 temp << " "; 2083 temp << reader->readLine(line); 2084 temp << "\n"; 2085 this->OutStreamer.EmitRawText(Twine(temp.str())); 2086} 2087 2088LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2089 if (reader == NULL) { 2090 reader = new LineReader(filename); 2091 } 2092 2093 if (reader->fileName() != filename) { 2094 delete reader; 2095 reader = new LineReader(filename); 2096 } 2097 2098 return reader; 2099} 2100 2101std::string LineReader::readLine(unsigned lineNum) { 2102 if (lineNum < theCurLine) { 2103 theCurLine = 0; 2104 fstr.seekg(0, std::ios::beg); 2105 } 2106 while (theCurLine < lineNum) { 2107 fstr.getline(buff, 500); 2108 theCurLine++; 2109 } 2110 return buff; 2111} 2112 2113// Force static initialization. 2114extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2115 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2116 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2117} 2118