1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "NVPTX.h" 17#include "NVPTXInstrInfo.h" 18#include "NVPTXTargetMachine.h" 19#include "NVPTXRegisterInfo.h" 20#include "NVPTXUtilities.h" 21#include "MCTargetDesc/NVPTXMCAsmInfo.h" 22#include "NVPTXNumRegisters.h" 23#include "llvm/ADT/StringExtras.h" 24#include "llvm/DebugInfo.h" 25#include "llvm/Function.h" 26#include "llvm/GlobalVariable.h" 27#include "llvm/Module.h" 28#include "llvm/CodeGen/Analysis.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineModuleInfo.h" 32#include "llvm/MC/MCStreamer.h" 33#include "llvm/MC/MCSymbol.h" 34#include "llvm/Target/Mangler.h" 35#include "llvm/Target/TargetLoweringObjectFile.h" 36#include "llvm/Support/TargetRegistry.h" 37#include "llvm/Support/ErrorHandling.h" 38#include "llvm/Support/FormattedStream.h" 39#include "llvm/DerivedTypes.h" 40#include "llvm/Support/TimeValue.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Analysis/ConstantFolding.h" 43#include "llvm/Support/Path.h" 44#include "llvm/Assembly/Writer.h" 45#include "cl_common_defines.h" 46#include <sstream> 47using namespace llvm; 48 49 50#include "NVPTXGenAsmWriter.inc" 51 52bool RegAllocNilUsed = true; 53 54#define DEPOTNAME "__local_depot" 55 56static cl::opt<bool> 57EmitLineNumbers("nvptx-emit-line-numbers", 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61namespace llvm { 62bool InterleaveSrcInPtx = false; 63} 64 65static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", 66 cl::ZeroOrMore, 67 cl::desc("NVPTX Specific: Emit source line in ptx file"), 68 cl::location(llvm::InterleaveSrcInPtx)); 69 70 71 72 73// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 74// cannot just link to the existing version. 75/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 76/// 77using namespace nvptx; 78const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 79 MCContext &Ctx = AP.OutContext; 80 81 if (CV->isNullValue() || isa<UndefValue>(CV)) 82 return MCConstantExpr::Create(0, Ctx); 83 84 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 85 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 86 87 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 88 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 89 90 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 91 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 92 93 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 94 if (CE == 0) 95 llvm_unreachable("Unknown constant value to lower!"); 96 97 98 switch (CE->getOpcode()) { 99 default: 100 // If the code isn't optimized, there may be outstanding folding 101 // opportunities. Attempt to fold the expression using TargetData as a 102 // last resort before giving up. 103 if (Constant *C = 104 ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) 105 if (C != CE) 106 return LowerConstant(C, AP); 107 108 // Otherwise report the problem to the user. 109 { 110 std::string S; 111 raw_string_ostream OS(S); 112 OS << "Unsupported expression in static initializer: "; 113 WriteAsOperand(OS, CE, /*PrintType=*/false, 114 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 115 report_fatal_error(OS.str()); 116 } 117 case Instruction::GetElementPtr: { 118 const TargetData &TD = *AP.TM.getTargetData(); 119 // Generate a symbolic expression for the byte address 120 const Constant *PtrVal = CE->getOperand(0); 121 SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); 122 int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); 123 124 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 125 if (Offset == 0) 126 return Base; 127 128 // Truncate/sext the offset to the pointer size. 129 if (TD.getPointerSizeInBits() != 64) { 130 int SExtAmount = 64-TD.getPointerSizeInBits(); 131 Offset = (Offset << SExtAmount) >> SExtAmount; 132 } 133 134 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 135 Ctx); 136 } 137 138 case Instruction::Trunc: 139 // We emit the value and depend on the assembler to truncate the generated 140 // expression properly. This is important for differences between 141 // blockaddress labels. Since the two labels are in the same function, it 142 // is reasonable to treat their delta as a 32-bit value. 143 // FALL THROUGH. 144 case Instruction::BitCast: 145 return LowerConstant(CE->getOperand(0), AP); 146 147 case Instruction::IntToPtr: { 148 const TargetData &TD = *AP.TM.getTargetData(); 149 // Handle casts to pointers by changing them into casts to the appropriate 150 // integer type. This promotes constant folding and simplifies this code. 151 Constant *Op = CE->getOperand(0); 152 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 153 false/*ZExt*/); 154 return LowerConstant(Op, AP); 155 } 156 157 case Instruction::PtrToInt: { 158 const TargetData &TD = *AP.TM.getTargetData(); 159 // Support only foldable casts to/from pointers that can be eliminated by 160 // changing the pointer to the appropriately sized integer type. 161 Constant *Op = CE->getOperand(0); 162 Type *Ty = CE->getType(); 163 164 const MCExpr *OpExpr = LowerConstant(Op, AP); 165 166 // We can emit the pointer value into this slot if the slot is an 167 // integer slot equal to the size of the pointer. 168 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 169 return OpExpr; 170 171 // Otherwise the pointer is smaller than the resultant integer, mask off 172 // the high bits so we are sure to get a proper truncation if the input is 173 // a constant expr. 174 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 175 const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); 176 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 177 } 178 179 // The MC library also has a right-shift operator, but it isn't consistently 180 // signed or unsigned between different targets. 181 case Instruction::Add: 182 case Instruction::Sub: 183 case Instruction::Mul: 184 case Instruction::SDiv: 185 case Instruction::SRem: 186 case Instruction::Shl: 187 case Instruction::And: 188 case Instruction::Or: 189 case Instruction::Xor: { 190 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 191 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 192 switch (CE->getOpcode()) { 193 default: llvm_unreachable("Unknown binary operator constant cast expr"); 194 case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 195 case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 196 case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 197 case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 198 case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 199 case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 200 case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 201 case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); 202 case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 203 } 204 } 205 } 206} 207 208 209void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) 210{ 211 if (!EmitLineNumbers) 212 return; 213 if (ignoreLoc(MI)) 214 return; 215 216 DebugLoc curLoc = MI.getDebugLoc(); 217 218 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 219 return; 220 221 if (prevDebugLoc == curLoc) 222 return; 223 224 prevDebugLoc = curLoc; 225 226 if (curLoc.isUnknown()) 227 return; 228 229 230 const MachineFunction *MF = MI.getParent()->getParent(); 231 //const TargetMachine &TM = MF->getTarget(); 232 233 const LLVMContext &ctx = MF->getFunction()->getContext(); 234 DIScope Scope(curLoc.getScope(ctx)); 235 236 if (!Scope.Verify()) 237 return; 238 239 StringRef fileName(Scope.getFilename()); 240 StringRef dirName(Scope.getDirectory()); 241 SmallString<128> FullPathName = dirName; 242 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 243 sys::path::append(FullPathName, fileName); 244 fileName = FullPathName.str(); 245 } 246 247 if (filenameMap.find(fileName.str()) == filenameMap.end()) 248 return; 249 250 251 // Emit the line from the source file. 252 if (llvm::InterleaveSrcInPtx) 253 this->emitSrcInText(fileName.str(), curLoc.getLine()); 254 255 std::stringstream temp; 256 temp << "\t.loc " << filenameMap[fileName.str()] 257 << " " << curLoc.getLine() << " " << curLoc.getCol(); 258 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 259} 260 261void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 262 SmallString<128> Str; 263 raw_svector_ostream OS(Str); 264 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 265 emitLineNumberAsDotLoc(*MI); 266 printInstruction(MI, OS); 267 OutStreamer.EmitRawText(OS.str()); 268} 269 270void NVPTXAsmPrinter::printReturnValStr(const Function *F, 271 raw_ostream &O) 272{ 273 const TargetData *TD = TM.getTargetData(); 274 const TargetLowering *TLI = TM.getTargetLowering(); 275 276 Type *Ty = F->getReturnType(); 277 278 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 279 280 if (Ty->getTypeID() == Type::VoidTyID) 281 return; 282 283 O << " ("; 284 285 if (isABI) { 286 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 287 unsigned size = 0; 288 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 289 size = ITy->getBitWidth(); 290 if (size < 32) size = 32; 291 } else { 292 assert(Ty->isFloatingPointTy() && 293 "Floating point type expected here"); 294 size = Ty->getPrimitiveSizeInBits(); 295 } 296 297 O << ".param .b" << size << " func_retval0"; 298 } 299 else if (isa<PointerType>(Ty)) { 300 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 301 << " func_retval0"; 302 } else { 303 if ((Ty->getTypeID() == Type::StructTyID) || 304 isa<VectorType>(Ty)) { 305 SmallVector<EVT, 16> vtparts; 306 ComputeValueVTs(*TLI, Ty, vtparts); 307 unsigned totalsz = 0; 308 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 309 unsigned elems = 1; 310 EVT elemtype = vtparts[i]; 311 if (vtparts[i].isVector()) { 312 elems = vtparts[i].getVectorNumElements(); 313 elemtype = vtparts[i].getVectorElementType(); 314 } 315 for (unsigned j=0, je=elems; j!=je; ++j) { 316 unsigned sz = elemtype.getSizeInBits(); 317 if (elemtype.isInteger() && (sz < 8)) sz = 8; 318 totalsz += sz/8; 319 } 320 } 321 unsigned retAlignment = 0; 322 if (!llvm::getAlign(*F, 0, retAlignment)) 323 retAlignment = TD->getABITypeAlignment(Ty); 324 O << ".param .align " 325 << retAlignment 326 << " .b8 func_retval0[" 327 << totalsz << "]"; 328 } else 329 assert(false && 330 "Unknown return type"); 331 } 332 } else { 333 SmallVector<EVT, 16> vtparts; 334 ComputeValueVTs(*TLI, Ty, vtparts); 335 unsigned idx = 0; 336 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 337 unsigned elems = 1; 338 EVT elemtype = vtparts[i]; 339 if (vtparts[i].isVector()) { 340 elems = vtparts[i].getVectorNumElements(); 341 elemtype = vtparts[i].getVectorElementType(); 342 } 343 344 for (unsigned j=0, je=elems; j!=je; ++j) { 345 unsigned sz = elemtype.getSizeInBits(); 346 if (elemtype.isInteger() && (sz < 32)) sz = 32; 347 O << ".reg .b" << sz << " func_retval" << idx; 348 if (j<je-1) O << ", "; 349 ++idx; 350 } 351 if (i < e-1) 352 O << ", "; 353 } 354 } 355 O << ") "; 356 return; 357} 358 359void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 360 raw_ostream &O) { 361 const Function *F = MF.getFunction(); 362 printReturnValStr(F, O); 363} 364 365void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 366 SmallString<128> Str; 367 raw_svector_ostream O(Str); 368 369 // Set up 370 MRI = &MF->getRegInfo(); 371 F = MF->getFunction(); 372 emitLinkageDirective(F,O); 373 if (llvm::isKernelFunction(*F)) 374 O << ".entry "; 375 else { 376 O << ".func "; 377 printReturnValStr(*MF, O); 378 } 379 380 O << *CurrentFnSym; 381 382 emitFunctionParamList(*MF, O); 383 384 if (llvm::isKernelFunction(*F)) 385 emitKernelFunctionDirectives(*F, O); 386 387 OutStreamer.EmitRawText(O.str()); 388 389 prevDebugLoc = DebugLoc(); 390} 391 392void NVPTXAsmPrinter::EmitFunctionBodyStart() { 393 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 394 unsigned numRegClasses = TRI.getNumRegClasses(); 395 VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; 396 OutStreamer.EmitRawText(StringRef("{\n")); 397 setAndEmitFunctionVirtualRegisters(*MF); 398 399 SmallString<128> Str; 400 raw_svector_ostream O(Str); 401 emitDemotedVars(MF->getFunction(), O); 402 OutStreamer.EmitRawText(O.str()); 403} 404 405void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 406 OutStreamer.EmitRawText(StringRef("}\n")); 407 delete []VRidGlobal2LocalMap; 408} 409 410 411void 412NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, 413 raw_ostream &O) const { 414 // If the NVVM IR has some of reqntid* specified, then output 415 // the reqntid directive, and set the unspecified ones to 1. 416 // If none of reqntid* is specified, don't output reqntid directive. 417 unsigned reqntidx, reqntidy, reqntidz; 418 bool specified = false; 419 if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; 420 else specified = true; 421 if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; 422 else specified = true; 423 if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; 424 else specified = true; 425 426 if (specified) 427 O << ".reqntid " << reqntidx << ", " 428 << reqntidy << ", " << reqntidz << "\n"; 429 430 // If the NVVM IR has some of maxntid* specified, then output 431 // the maxntid directive, and set the unspecified ones to 1. 432 // If none of maxntid* is specified, don't output maxntid directive. 433 unsigned maxntidx, maxntidy, maxntidz; 434 specified = false; 435 if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; 436 else specified = true; 437 if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; 438 else specified = true; 439 if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; 440 else specified = true; 441 442 if (specified) 443 O << ".maxntid " << maxntidx << ", " 444 << maxntidy << ", " << maxntidz << "\n"; 445 446 unsigned mincta; 447 if (llvm::getMinCTASm(F, mincta)) 448 O << ".minnctapersm " << mincta << "\n"; 449} 450 451void 452NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 453 raw_ostream &O) { 454 const TargetRegisterClass * RC = MRI->getRegClass(vr); 455 unsigned id = RC->getID(); 456 457 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; 458 unsigned mapped_vr = regmap[vr]; 459 460 if (!isVec) { 461 O << getNVPTXRegClassStr(RC) << mapped_vr; 462 return; 463 } 464 // Vector virtual register 465 if (getNVPTXVectorSize(RC) == 4) 466 O << "{" 467 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " 468 << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " 469 << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " 470 << getNVPTXRegClassStr(RC) << mapped_vr << "_3" 471 << "}"; 472 else if (getNVPTXVectorSize(RC) == 2) 473 O << "{" 474 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " 475 << getNVPTXRegClassStr(RC) << mapped_vr << "_1" 476 << "}"; 477 else 478 llvm_unreachable("Unsupported vector size"); 479} 480 481void 482NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 483 raw_ostream &O) { 484 getVirtualRegisterName(vr, isVec, O); 485} 486 487void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, 488 const char *Modifier, 489 raw_ostream &O) { 490 static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; 491 int Imm = (int)MO.getImm(); 492 if(0 == strcmp(Modifier, "vecelem")) 493 O << "_" << vecelem[Imm]; 494 else if(0 == strcmp(Modifier, "vecv4comm1")) { 495 if((Imm < 0) || (Imm > 3)) 496 O << "//"; 497 } 498 else if(0 == strcmp(Modifier, "vecv4comm2")) { 499 if((Imm < 4) || (Imm > 7)) 500 O << "//"; 501 } 502 else if(0 == strcmp(Modifier, "vecv4pos")) { 503 if(Imm < 0) Imm = 0; 504 O << "_" << vecelem[Imm%4]; 505 } 506 else if(0 == strcmp(Modifier, "vecv2comm1")) { 507 if((Imm < 0) || (Imm > 1)) 508 O << "//"; 509 } 510 else if(0 == strcmp(Modifier, "vecv2comm2")) { 511 if((Imm < 2) || (Imm > 3)) 512 O << "//"; 513 } 514 else if(0 == strcmp(Modifier, "vecv2pos")) { 515 if(Imm < 0) Imm = 0; 516 O << "_" << vecelem[Imm%2]; 517 } 518 else 519 llvm_unreachable("Unknown Modifier on immediate operand"); 520} 521 522void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 523 raw_ostream &O, const char *Modifier) { 524 const MachineOperand &MO = MI->getOperand(opNum); 525 switch (MO.getType()) { 526 case MachineOperand::MO_Register: 527 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 528 if (MO.getReg() == NVPTX::VRDepot) 529 O << DEPOTNAME << getFunctionNumber(); 530 else 531 O << getRegisterName(MO.getReg()); 532 } else { 533 if (!Modifier) 534 emitVirtualRegister(MO.getReg(), false, O); 535 else { 536 if (strcmp(Modifier, "vecfull") == 0) 537 emitVirtualRegister(MO.getReg(), true, O); 538 else 539 llvm_unreachable( 540 "Don't know how to handle the modifier on virtual register."); 541 } 542 } 543 return; 544 545 case MachineOperand::MO_Immediate: 546 if (!Modifier) 547 O << MO.getImm(); 548 else if (strstr(Modifier, "vec") == Modifier) 549 printVecModifiedImmediate(MO, Modifier, O); 550 else 551 llvm_unreachable("Don't know how to handle modifier on immediate operand"); 552 return; 553 554 case MachineOperand::MO_FPImmediate: 555 printFPConstant(MO.getFPImm(), O); 556 break; 557 558 case MachineOperand::MO_GlobalAddress: 559 O << *Mang->getSymbol(MO.getGlobal()); 560 break; 561 562 case MachineOperand::MO_ExternalSymbol: { 563 const char * symbname = MO.getSymbolName(); 564 if (strstr(symbname, ".PARAM") == symbname) { 565 unsigned index; 566 sscanf(symbname+6, "%u[];", &index); 567 printParamName(index, O); 568 } 569 else if (strstr(symbname, ".HLPPARAM") == symbname) { 570 unsigned index; 571 sscanf(symbname+9, "%u[];", &index); 572 O << *CurrentFnSym << "_param_" << index << "_offset"; 573 } 574 else 575 O << symbname; 576 break; 577 } 578 579 case MachineOperand::MO_MachineBasicBlock: 580 O << *MO.getMBB()->getSymbol(); 581 return; 582 583 default: 584 llvm_unreachable("Operand type not supported."); 585 } 586} 587 588void NVPTXAsmPrinter:: 589printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { 590#ifndef __OPTIMIZE__ 591 O << "\t// Implicit def :"; 592 //printOperand(MI, 0); 593 O << "\n"; 594#endif 595} 596 597void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 598 raw_ostream &O, const char *Modifier) { 599 printOperand(MI, opNum, O); 600 601 if (Modifier && !strcmp(Modifier, "add")) { 602 O << ", "; 603 printOperand(MI, opNum+1, O); 604 } else { 605 if (MI->getOperand(opNum+1).isImm() && 606 MI->getOperand(opNum+1).getImm() == 0) 607 return; // don't print ',0' or '+0' 608 O << "+"; 609 printOperand(MI, opNum+1, O); 610 } 611} 612 613void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, 614 raw_ostream &O, const char *Modifier) 615{ 616 if (Modifier) { 617 const MachineOperand &MO = MI->getOperand(opNum); 618 int Imm = (int)MO.getImm(); 619 if (!strcmp(Modifier, "volatile")) { 620 if (Imm) 621 O << ".volatile"; 622 } else if (!strcmp(Modifier, "addsp")) { 623 switch (Imm) { 624 case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; 625 case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; 626 case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; 627 case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; 628 case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; 629 case NVPTX::PTXLdStInstCode::GENERIC: 630 if (!nvptxSubtarget.hasGenericLdSt()) 631 O << ".global"; 632 break; 633 default: 634 assert("wrong value"); 635 } 636 } 637 else if (!strcmp(Modifier, "sign")) { 638 if (Imm==NVPTX::PTXLdStInstCode::Signed) 639 O << "s"; 640 else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) 641 O << "u"; 642 else 643 O << "f"; 644 } 645 else if (!strcmp(Modifier, "vec")) { 646 if (Imm==NVPTX::PTXLdStInstCode::V2) 647 O << ".v2"; 648 else if (Imm==NVPTX::PTXLdStInstCode::V4) 649 O << ".v4"; 650 } 651 else 652 assert("unknown modifier"); 653 } 654 else 655 assert("unknown modifier"); 656} 657 658void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { 659 660 emitLinkageDirective(F,O); 661 if (llvm::isKernelFunction(*F)) 662 O << ".entry "; 663 else 664 O << ".func "; 665 printReturnValStr(F, O); 666 O << *CurrentFnSym << "\n"; 667 emitFunctionParamList(F, O); 668 O << ";\n"; 669} 670 671static bool usedInGlobalVarDef(const Constant *C) 672{ 673 if (!C) 674 return false; 675 676 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 677 if (GV->getName().str() == "llvm.used") 678 return false; 679 return true; 680 } 681 682 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); 683 ui!=ue; ++ui) { 684 const Constant *C = dyn_cast<Constant>(*ui); 685 if (usedInGlobalVarDef(C)) 686 return true; 687 } 688 return false; 689} 690 691static bool usedInOneFunc(const User *U, Function const *&oneFunc) 692{ 693 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 694 if (othergv->getName().str() == "llvm.used") 695 return true; 696 } 697 698 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 699 if (instr->getParent() && instr->getParent()->getParent()) { 700 const Function *curFunc = instr->getParent()->getParent(); 701 if (oneFunc && (curFunc != oneFunc)) 702 return false; 703 oneFunc = curFunc; 704 return true; 705 } 706 else 707 return false; 708 } 709 710 if (const MDNode *md = dyn_cast<MDNode>(U)) 711 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 712 (md->getName().str() == "llvm.dbg.sp"))) 713 return true; 714 715 716 for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); 717 ui!=ue; ++ui) { 718 if (usedInOneFunc(*ui, oneFunc) == false) 719 return false; 720 } 721 return true; 722} 723 724/* Find out if a global variable can be demoted to local scope. 725 * Currently, this is valid for CUDA shared variables, which have local 726 * scope and global lifetime. So the conditions to check are : 727 * 1. Is the global variable in shared address space? 728 * 2. Does it have internal linkage? 729 * 3. Is the global variable referenced only in one function? 730 */ 731static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 732 if (gv->hasInternalLinkage() == false) 733 return false; 734 const PointerType *Pty = gv->getType(); 735 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 736 return false; 737 738 const Function *oneFunc = 0; 739 740 bool flag = usedInOneFunc(gv, oneFunc); 741 if (flag == false) 742 return false; 743 if (!oneFunc) 744 return false; 745 f = oneFunc; 746 return true; 747} 748 749static bool useFuncSeen(const Constant *C, 750 llvm::DenseMap<const Function *, bool> &seenMap) { 751 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); 752 ui!=ue; ++ui) { 753 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 754 if (useFuncSeen(cu, seenMap)) 755 return true; 756 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 757 const BasicBlock *bb = I->getParent(); 758 if (!bb) continue; 759 const Function *caller = bb->getParent(); 760 if (!caller) continue; 761 if (seenMap.find(caller) != seenMap.end()) 762 return true; 763 } 764 } 765 return false; 766} 767 768void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { 769 llvm::DenseMap<const Function *, bool> seenMap; 770 for (Module::const_iterator FI=M.begin(), FE=M.end(); 771 FI!=FE; ++FI) { 772 const Function *F = FI; 773 774 if (F->isDeclaration()) { 775 if (F->use_empty()) 776 continue; 777 if (F->getIntrinsicID()) 778 continue; 779 CurrentFnSym = Mang->getSymbol(F); 780 emitDeclaration(F, O); 781 continue; 782 } 783 for (Value::const_use_iterator iter=F->use_begin(), 784 iterEnd=F->use_end(); iter!=iterEnd; ++iter) { 785 if (const Constant *C = dyn_cast<Constant>(*iter)) { 786 if (usedInGlobalVarDef(C)) { 787 // The use is in the initialization of a global variable 788 // that is a function pointer, so print a declaration 789 // for the original function 790 CurrentFnSym = Mang->getSymbol(F); 791 emitDeclaration(F, O); 792 break; 793 } 794 // Emit a declaration of this function if the function that 795 // uses this constant expr has already been seen. 796 if (useFuncSeen(C, seenMap)) { 797 CurrentFnSym = Mang->getSymbol(F); 798 emitDeclaration(F, O); 799 break; 800 } 801 } 802 803 if (!isa<Instruction>(*iter)) continue; 804 const Instruction *instr = cast<Instruction>(*iter); 805 const BasicBlock *bb = instr->getParent(); 806 if (!bb) continue; 807 const Function *caller = bb->getParent(); 808 if (!caller) continue; 809 810 // If a caller has already been seen, then the caller is 811 // appearing in the module before the callee. so print out 812 // a declaration for the callee. 813 if (seenMap.find(caller) != seenMap.end()) { 814 CurrentFnSym = Mang->getSymbol(F); 815 emitDeclaration(F, O); 816 break; 817 } 818 } 819 seenMap[F] = true; 820 } 821} 822 823void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 824 DebugInfoFinder DbgFinder; 825 DbgFinder.processModule(M); 826 827 unsigned i=1; 828 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 829 E = DbgFinder.compile_unit_end(); I != E; ++I) { 830 DICompileUnit DIUnit(*I); 831 StringRef Filename(DIUnit.getFilename()); 832 StringRef Dirname(DIUnit.getDirectory()); 833 SmallString<128> FullPathName = Dirname; 834 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 835 sys::path::append(FullPathName, Filename); 836 Filename = FullPathName.str(); 837 } 838 if (filenameMap.find(Filename.str()) != filenameMap.end()) 839 continue; 840 filenameMap[Filename.str()] = i; 841 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 842 ++i; 843 } 844 845 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 846 E = DbgFinder.subprogram_end(); I != E; ++I) { 847 DISubprogram SP(*I); 848 StringRef Filename(SP.getFilename()); 849 StringRef Dirname(SP.getDirectory()); 850 SmallString<128> FullPathName = Dirname; 851 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 852 sys::path::append(FullPathName, Filename); 853 Filename = FullPathName.str(); 854 } 855 if (filenameMap.find(Filename.str()) != filenameMap.end()) 856 continue; 857 filenameMap[Filename.str()] = i; 858 ++i; 859 } 860} 861 862bool NVPTXAsmPrinter::doInitialization (Module &M) { 863 864 SmallString<128> Str1; 865 raw_svector_ostream OS1(Str1); 866 867 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 868 MMI->AnalyzeModule(M); 869 870 // We need to call the parent's one explicitly. 871 //bool Result = AsmPrinter::doInitialization(M); 872 873 // Initialize TargetLoweringObjectFile. 874 const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) 875 .Initialize(OutContext, TM); 876 877 Mang = new Mangler(OutContext, *TM.getTargetData()); 878 879 // Emit header before any dwarf directives are emitted below. 880 emitHeader(M, OS1); 881 OutStreamer.EmitRawText(OS1.str()); 882 883 884 // Already commented out 885 //bool Result = AsmPrinter::doInitialization(M); 886 887 888 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 889 recordAndEmitFilenames(M); 890 891 SmallString<128> Str2; 892 raw_svector_ostream OS2(Str2); 893 894 emitDeclarations(M, OS2); 895 896 // Print out module-level global variables here. 897 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 898 I != E; ++I) 899 printModuleLevelGV(I, OS2); 900 901 OS2 << '\n'; 902 903 OutStreamer.EmitRawText(OS2.str()); 904 return false; // success 905} 906 907void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { 908 O << "//\n"; 909 O << "// Generated by LLVM NVPTX Back-End\n"; 910 O << "//\n"; 911 O << "\n"; 912 913 O << ".version 3.0\n"; 914 915 O << ".target "; 916 O << nvptxSubtarget.getTargetName(); 917 918 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 919 O << ", texmode_independent"; 920 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 921 if (!nvptxSubtarget.hasDouble()) 922 O << ", map_f64_to_f32"; 923 } 924 925 if (MAI->doesSupportDebugInformation()) 926 O << ", debug"; 927 928 O << "\n"; 929 930 O << ".address_size "; 931 if (nvptxSubtarget.is64Bit()) 932 O << "64"; 933 else 934 O << "32"; 935 O << "\n"; 936 937 O << "\n"; 938} 939 940bool NVPTXAsmPrinter::doFinalization(Module &M) { 941 // XXX Temproarily remove global variables so that doFinalization() will not 942 // emit them again (global variables are emitted at beginning). 943 944 Module::GlobalListType &global_list = M.getGlobalList(); 945 int i, n = global_list.size(); 946 GlobalVariable **gv_array = new GlobalVariable* [n]; 947 948 // first, back-up GlobalVariable in gv_array 949 i = 0; 950 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 951 I != E; ++I) 952 gv_array[i++] = &*I; 953 954 // second, empty global_list 955 while (!global_list.empty()) 956 global_list.remove(global_list.begin()); 957 958 // call doFinalization 959 bool ret = AsmPrinter::doFinalization(M); 960 961 // now we restore global variables 962 for (i = 0; i < n; i ++) 963 global_list.insert(global_list.end(), gv_array[i]); 964 965 delete[] gv_array; 966 return ret; 967 968 969 //bool Result = AsmPrinter::doFinalization(M); 970 // Instead of calling the parents doFinalization, we may 971 // clone parents doFinalization and customize here. 972 // Currently, we if NVISA out the EmitGlobals() in 973 // parent's doFinalization, which is too intrusive. 974 // 975 // Same for the doInitialization. 976 //return Result; 977} 978 979// This function emits appropriate linkage directives for 980// functions and global variables. 981// 982// extern function declaration -> .extern 983// extern function definition -> .visible 984// external global variable with init -> .visible 985// external without init -> .extern 986// appending -> not allowed, assert. 987 988void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) 989{ 990 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 991 if (V->hasExternalLinkage()) { 992 if (isa<GlobalVariable>(V)) { 993 const GlobalVariable *GVar = cast<GlobalVariable>(V); 994 if (GVar) { 995 if (GVar->hasInitializer()) 996 O << ".visible "; 997 else 998 O << ".extern "; 999 } 1000 } else if (V->isDeclaration()) 1001 O << ".extern "; 1002 else 1003 O << ".visible "; 1004 } else if (V->hasAppendingLinkage()) { 1005 std::string msg; 1006 msg.append("Error: "); 1007 msg.append("Symbol "); 1008 if (V->hasName()) 1009 msg.append(V->getName().str()); 1010 msg.append("has unsupported appending linkage type"); 1011 llvm_unreachable(msg.c_str()); 1012 } 1013 } 1014} 1015 1016 1017void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, 1018 bool processDemoted) { 1019 1020 // Skip meta data 1021 if (GVar->hasSection()) { 1022 if (GVar->getSection() == "llvm.metadata") 1023 return; 1024 } 1025 1026 const TargetData *TD = TM.getTargetData(); 1027 1028 // GlobalVariables are always constant pointers themselves. 1029 const PointerType *PTy = GVar->getType(); 1030 Type *ETy = PTy->getElementType(); 1031 1032 if (GVar->hasExternalLinkage()) { 1033 if (GVar->hasInitializer()) 1034 O << ".visible "; 1035 else 1036 O << ".extern "; 1037 } 1038 1039 if (llvm::isTexture(*GVar)) { 1040 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1041 return; 1042 } 1043 1044 if (llvm::isSurface(*GVar)) { 1045 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1046 return; 1047 } 1048 1049 if (GVar->isDeclaration()) { 1050 // (extern) declarations, no definition or initializer 1051 // Currently the only known declaration is for an automatic __local 1052 // (.shared) promoted to global. 1053 emitPTXGlobalVariable(GVar, O); 1054 O << ";\n"; 1055 return; 1056 } 1057 1058 if (llvm::isSampler(*GVar)) { 1059 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1060 1061 Constant *Initializer = NULL; 1062 if (GVar->hasInitializer()) 1063 Initializer = GVar->getInitializer(); 1064 ConstantInt *CI = NULL; 1065 if (Initializer) 1066 CI = dyn_cast<ConstantInt>(Initializer); 1067 if (CI) { 1068 unsigned sample=CI->getZExtValue(); 1069 1070 O << " = { "; 1071 1072 for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> 1073 __CLK_ADDRESS_BASE) ; i < 3 ; i++) { 1074 O << "addr_mode_" << i << " = "; 1075 switch (addr) { 1076 case 0: O << "wrap"; break; 1077 case 1: O << "clamp_to_border"; break; 1078 case 2: O << "clamp_to_edge"; break; 1079 case 3: O << "wrap"; break; 1080 case 4: O << "mirror"; break; 1081 } 1082 O <<", "; 1083 } 1084 O << "filter_mode = "; 1085 switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { 1086 case 0: O << "nearest"; break; 1087 case 1: O << "linear"; break; 1088 case 2: assert ( 0 && "Anisotropic filtering is not supported"); 1089 default: O << "nearest"; break; 1090 } 1091 if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { 1092 O << ", force_unnormalized_coords = 1"; 1093 } 1094 O << " }"; 1095 } 1096 1097 O << ";\n"; 1098 return; 1099 } 1100 1101 if (GVar->hasPrivateLinkage()) { 1102 1103 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1104 return; 1105 1106 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1107 if (!strncmp(GVar->getName().data(), "filename", 8)) 1108 return; 1109 if (GVar->use_empty()) 1110 return; 1111 } 1112 1113 const Function *demotedFunc = 0; 1114 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1115 O << "// " << GVar->getName().str() << " has been demoted\n"; 1116 if (localDecls.find(demotedFunc) != localDecls.end()) 1117 localDecls[demotedFunc].push_back(GVar); 1118 else { 1119 std::vector<GlobalVariable *> temp; 1120 temp.push_back(GVar); 1121 localDecls[demotedFunc] = temp; 1122 } 1123 return; 1124 } 1125 1126 O << "."; 1127 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1128 if (GVar->getAlignment() == 0) 1129 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1130 else 1131 O << " .align " << GVar->getAlignment(); 1132 1133 1134 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1135 O << " ."; 1136 O << getPTXFundamentalTypeStr(ETy, false); 1137 O << " "; 1138 O << *Mang->getSymbol(GVar); 1139 1140 // Ptx allows variable initilization only for constant and global state 1141 // spaces. 1142 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1143 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1144 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) 1145 && GVar->hasInitializer()) { 1146 Constant *Initializer = GVar->getInitializer(); 1147 if (!Initializer->isNullValue()) { 1148 O << " = " ; 1149 printScalarConstant(Initializer, O); 1150 } 1151 } 1152 } else { 1153 unsigned int ElementSize =0; 1154 1155 // Although PTX has direct support for struct type and array type and 1156 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1157 // targets that support these high level field accesses. Structs, arrays 1158 // and vectors are lowered into arrays of bytes. 1159 switch (ETy->getTypeID()) { 1160 case Type::StructTyID: 1161 case Type::ArrayTyID: 1162 case Type::VectorTyID: 1163 ElementSize = TD->getTypeStoreSize(ETy); 1164 // Ptx allows variable initilization only for constant and 1165 // global state spaces. 1166 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1167 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1168 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) 1169 && GVar->hasInitializer()) { 1170 Constant *Initializer = GVar->getInitializer(); 1171 if (!isa<UndefValue>(Initializer) && 1172 !Initializer->isNullValue()) { 1173 AggBuffer aggBuffer(ElementSize, O, *this); 1174 bufferAggregateConstant(Initializer, &aggBuffer); 1175 if (aggBuffer.numSymbols) { 1176 if (nvptxSubtarget.is64Bit()) { 1177 O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; 1178 O << ElementSize/8; 1179 } 1180 else { 1181 O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; 1182 O << ElementSize/4; 1183 } 1184 O << "]"; 1185 } 1186 else { 1187 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; 1188 O << ElementSize; 1189 O << "]"; 1190 } 1191 O << " = {" ; 1192 aggBuffer.print(); 1193 O << "}"; 1194 } 1195 else { 1196 O << " .b8 " << *Mang->getSymbol(GVar) ; 1197 if (ElementSize) { 1198 O <<"[" ; 1199 O << ElementSize; 1200 O << "]"; 1201 } 1202 } 1203 } 1204 else { 1205 O << " .b8 " << *Mang->getSymbol(GVar); 1206 if (ElementSize) { 1207 O <<"[" ; 1208 O << ElementSize; 1209 O << "]"; 1210 } 1211 } 1212 break; 1213 default: 1214 assert( 0 && "type not supported yet"); 1215 } 1216 1217 } 1218 O << ";\n"; 1219} 1220 1221void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1222 if (localDecls.find(f) == localDecls.end()) 1223 return; 1224 1225 std::vector<GlobalVariable *> &gvars = localDecls[f]; 1226 1227 for (unsigned i=0, e=gvars.size(); i!=e; ++i) { 1228 O << "\t// demoted variable\n\t"; 1229 printModuleLevelGV(gvars[i], O, true); 1230 } 1231} 1232 1233void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1234 raw_ostream &O) const { 1235 switch (AddressSpace) { 1236 case llvm::ADDRESS_SPACE_LOCAL: 1237 O << "local" ; 1238 break; 1239 case llvm::ADDRESS_SPACE_GLOBAL: 1240 O << "global" ; 1241 break; 1242 case llvm::ADDRESS_SPACE_CONST: 1243 // This logic should be consistent with that in 1244 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) 1245 if (nvptxSubtarget.hasGenericLdSt()) 1246 O << "global" ; 1247 else 1248 O << "const" ; 1249 break; 1250 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1251 O << "const" ; 1252 break; 1253 case llvm::ADDRESS_SPACE_SHARED: 1254 O << "shared" ; 1255 break; 1256 default: 1257 llvm_unreachable("unexpected address space"); 1258 } 1259} 1260 1261std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, 1262 bool useB4PTR) const { 1263 switch (Ty->getTypeID()) { 1264 default: 1265 llvm_unreachable("unexpected type"); 1266 break; 1267 case Type::IntegerTyID: { 1268 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1269 if (NumBits == 1) 1270 return "pred"; 1271 else if (NumBits <= 64) { 1272 std::string name = "u"; 1273 return name + utostr(NumBits); 1274 } else { 1275 llvm_unreachable("Integer too large"); 1276 break; 1277 } 1278 break; 1279 } 1280 case Type::FloatTyID: 1281 return "f32"; 1282 case Type::DoubleTyID: 1283 return "f64"; 1284 case Type::PointerTyID: 1285 if (nvptxSubtarget.is64Bit()) 1286 if (useB4PTR) return "b64"; 1287 else return "u64"; 1288 else 1289 if (useB4PTR) return "b32"; 1290 else return "u32"; 1291 } 1292 llvm_unreachable("unexpected type"); 1293 return NULL; 1294} 1295 1296void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, 1297 raw_ostream &O) { 1298 1299 const TargetData *TD = TM.getTargetData(); 1300 1301 // GlobalVariables are always constant pointers themselves. 1302 const PointerType *PTy = GVar->getType(); 1303 Type *ETy = PTy->getElementType(); 1304 1305 O << "."; 1306 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1307 if (GVar->getAlignment() == 0) 1308 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1309 else 1310 O << " .align " << GVar->getAlignment(); 1311 1312 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1313 O << " ."; 1314 O << getPTXFundamentalTypeStr(ETy); 1315 O << " "; 1316 O << *Mang->getSymbol(GVar); 1317 return; 1318 } 1319 1320 int64_t ElementSize =0; 1321 1322 // Although PTX has direct support for struct type and array type and LLVM IR 1323 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1324 // support these high level field accesses. Structs and arrays are lowered 1325 // into arrays of bytes. 1326 switch (ETy->getTypeID()) { 1327 case Type::StructTyID: 1328 case Type::ArrayTyID: 1329 case Type::VectorTyID: 1330 ElementSize = TD->getTypeStoreSize(ETy); 1331 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; 1332 if (ElementSize) { 1333 O << itostr(ElementSize) ; 1334 } 1335 O << "]"; 1336 break; 1337 default: 1338 assert( 0 && "type not supported yet"); 1339 } 1340 return ; 1341} 1342 1343 1344static unsigned int 1345getOpenCLAlignment(const TargetData *TD, 1346 Type *Ty) { 1347 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1348 return TD->getPrefTypeAlignment(Ty); 1349 1350 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1351 if (ATy) 1352 return getOpenCLAlignment(TD, ATy->getElementType()); 1353 1354 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1355 if (VTy) { 1356 Type *ETy = VTy->getElementType(); 1357 unsigned int numE = VTy->getNumElements(); 1358 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1359 if (numE == 3) 1360 return 4*alignE; 1361 else 1362 return numE*alignE; 1363 } 1364 1365 const StructType *STy = dyn_cast<StructType>(Ty); 1366 if (STy) { 1367 unsigned int alignStruct = 1; 1368 // Go through each element of the struct and find the 1369 // largest alignment. 1370 for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { 1371 Type *ETy = STy->getElementType(i); 1372 unsigned int align = getOpenCLAlignment(TD, ETy); 1373 if (align > alignStruct) 1374 alignStruct = align; 1375 } 1376 return alignStruct; 1377 } 1378 1379 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1380 if (FTy) 1381 return TD->getPointerPrefAlignment(); 1382 return TD->getPrefTypeAlignment(Ty); 1383} 1384 1385void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1386 int paramIndex, raw_ostream &O) { 1387 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1388 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1389 O << *CurrentFnSym << "_param_" << paramIndex; 1390 else { 1391 std::string argName = I->getName(); 1392 const char *p = argName.c_str(); 1393 while (*p) { 1394 if (*p == '.') 1395 O << "_"; 1396 else 1397 O << *p; 1398 p++; 1399 } 1400 } 1401} 1402 1403void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1404 Function::const_arg_iterator I, E; 1405 int i = 0; 1406 1407 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1408 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1409 O << *CurrentFnSym << "_param_" << paramIndex; 1410 return; 1411 } 1412 1413 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1414 if (i==paramIndex) { 1415 printParamName(I, paramIndex, O); 1416 return; 1417 } 1418 } 1419 llvm_unreachable("paramIndex out of bound"); 1420} 1421 1422void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, 1423 raw_ostream &O) { 1424 const TargetData *TD = TM.getTargetData(); 1425 const AttrListPtr &PAL = F->getAttributes(); 1426 const TargetLowering *TLI = TM.getTargetLowering(); 1427 Function::const_arg_iterator I, E; 1428 unsigned paramIndex = 0; 1429 bool first = true; 1430 bool isKernelFunc = llvm::isKernelFunction(*F); 1431 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1432 MVT thePointerTy = TLI->getPointerTy(); 1433 1434 O << "(\n"; 1435 1436 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1437 const Type *Ty = I->getType(); 1438 1439 if (!first) 1440 O << ",\n"; 1441 1442 first = false; 1443 1444 // Handle image/sampler parameters 1445 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1446 if (llvm::isImage(*I)) { 1447 std::string sname = I->getName(); 1448 if (llvm::isImageWriteOnly(*I)) 1449 O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; 1450 else // Default image is read_only 1451 O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; 1452 } 1453 else // Should be llvm::isSampler(*I) 1454 O << "\t.param .samplerref " << *CurrentFnSym << "_param_" 1455 << paramIndex; 1456 continue; 1457 } 1458 1459 if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) { 1460 // Just a scalar 1461 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1462 if (isKernelFunc) { 1463 if (PTy) { 1464 // Special handling for pointer arguments to kernel 1465 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1466 1467 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1468 Type *ETy = PTy->getElementType(); 1469 int addrSpace = PTy->getAddressSpace(); 1470 switch(addrSpace) { 1471 default: 1472 O << ".ptr "; 1473 break; 1474 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1475 O << ".ptr .const "; 1476 break; 1477 case llvm::ADDRESS_SPACE_SHARED: 1478 O << ".ptr .shared "; 1479 break; 1480 case llvm::ADDRESS_SPACE_GLOBAL: 1481 case llvm::ADDRESS_SPACE_CONST: 1482 O << ".ptr .global "; 1483 break; 1484 } 1485 O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; 1486 } 1487 printParamName(I, paramIndex, O); 1488 continue; 1489 } 1490 1491 // non-pointer scalar to kernel func 1492 O << "\t.param ." 1493 << getPTXFundamentalTypeStr(Ty) << " "; 1494 printParamName(I, paramIndex, O); 1495 continue; 1496 } 1497 // Non-kernel function, just print .param .b<size> for ABI 1498 // and .reg .b<size> for non ABY 1499 unsigned sz = 0; 1500 if (isa<IntegerType>(Ty)) { 1501 sz = cast<IntegerType>(Ty)->getBitWidth(); 1502 if (sz < 32) sz = 32; 1503 } 1504 else if (isa<PointerType>(Ty)) 1505 sz = thePointerTy.getSizeInBits(); 1506 else 1507 sz = Ty->getPrimitiveSizeInBits(); 1508 if (isABI) 1509 O << "\t.param .b" << sz << " "; 1510 else 1511 O << "\t.reg .b" << sz << " "; 1512 printParamName(I, paramIndex, O); 1513 continue; 1514 } 1515 1516 // param has byVal attribute. So should be a pointer 1517 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1518 assert(PTy && 1519 "Param with byval attribute should be a pointer type"); 1520 Type *ETy = PTy->getElementType(); 1521 1522 if (isABI || isKernelFunc) { 1523 // Just print .param .b8 .align <a> .param[size]; 1524 // <a> = PAL.getparamalignment 1525 // size = typeallocsize of element type 1526 unsigned align = PAL.getParamAlignment(paramIndex+1); 1527 unsigned sz = TD->getTypeAllocSize(ETy); 1528 O << "\t.param .align " << align 1529 << " .b8 "; 1530 printParamName(I, paramIndex, O); 1531 O << "[" << sz << "]"; 1532 continue; 1533 } else { 1534 // Split the ETy into constituent parts and 1535 // print .param .b<size> <name> for each part. 1536 // Further, if a part is vector, print the above for 1537 // each vector element. 1538 SmallVector<EVT, 16> vtparts; 1539 ComputeValueVTs(*TLI, ETy, vtparts); 1540 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 1541 unsigned elems = 1; 1542 EVT elemtype = vtparts[i]; 1543 if (vtparts[i].isVector()) { 1544 elems = vtparts[i].getVectorNumElements(); 1545 elemtype = vtparts[i].getVectorElementType(); 1546 } 1547 1548 for (unsigned j=0,je=elems; j!=je; ++j) { 1549 unsigned sz = elemtype.getSizeInBits(); 1550 if (elemtype.isInteger() && (sz < 32)) sz = 32; 1551 O << "\t.reg .b" << sz << " "; 1552 printParamName(I, paramIndex, O); 1553 if (j<je-1) O << ",\n"; 1554 ++paramIndex; 1555 } 1556 if (i<e-1) 1557 O << ",\n"; 1558 } 1559 --paramIndex; 1560 continue; 1561 } 1562 } 1563 1564 O << "\n)\n"; 1565} 1566 1567void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1568 raw_ostream &O) { 1569 const Function *F = MF.getFunction(); 1570 emitFunctionParamList(F, O); 1571} 1572 1573 1574void NVPTXAsmPrinter:: 1575setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { 1576 SmallString<128> Str; 1577 raw_svector_ostream O(Str); 1578 1579 // Map the global virtual register number to a register class specific 1580 // virtual register number starting from 1 with that class. 1581 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1582 //unsigned numRegClasses = TRI->getNumRegClasses(); 1583 1584 // Emit the Fake Stack Object 1585 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1586 int NumBytes = (int) MFI->getStackSize(); 1587 if (NumBytes) { 1588 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" 1589 << DEPOTNAME 1590 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1591 if (nvptxSubtarget.is64Bit()) { 1592 O << "\t.reg .b64 \t%SP;\n"; 1593 O << "\t.reg .b64 \t%SPL;\n"; 1594 } 1595 else { 1596 O << "\t.reg .b32 \t%SP;\n"; 1597 O << "\t.reg .b32 \t%SPL;\n"; 1598 } 1599 } 1600 1601 // Go through all virtual registers to establish the mapping between the 1602 // global virtual 1603 // register number and the per class virtual register number. 1604 // We use the per class virtual register number in the ptx output. 1605 unsigned int numVRs = MRI->getNumVirtRegs(); 1606 for (unsigned i=0; i< numVRs; i++) { 1607 unsigned int vr = TRI->index2VirtReg(i); 1608 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1609 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; 1610 int n = regmap.size(); 1611 regmap.insert(std::make_pair(vr, n+1)); 1612 } 1613 1614 // Emit register declarations 1615 // @TODO: Extract out the real register usage 1616 O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1617 O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1618 O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1619 O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1620 O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1621 O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1622 O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1623 1624 // Emit declaration of the virtual registers or 'physical' registers for 1625 // each register class 1626 //for (unsigned i=0; i< numRegClasses; i++) { 1627 // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; 1628 // const TargetRegisterClass *RC = TRI->getRegClass(i); 1629 // std::string rcname = getNVPTXRegClassName(RC); 1630 // std::string rcStr = getNVPTXRegClassStr(RC); 1631 // //int n = regmap.size(); 1632 // if (!isNVPTXVectorRegClass(RC)) { 1633 // O << "\t.reg " << rcname << " \t" << rcStr << "<" 1634 // << NVPTXNumRegisters << ">;\n"; 1635 // } 1636 1637 // Only declare those registers that may be used. And do not emit vector 1638 // registers as 1639 // they are all elementized to scalar registers. 1640 //if (n && !isNVPTXVectorRegClass(RC)) { 1641 // if (RegAllocNilUsed) { 1642 // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1643 // << ">;\n"; 1644 // } 1645 // else { 1646 // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) 1647 // << "<" << 32 << ">;\n"; 1648 // } 1649 //} 1650 //} 1651 1652 OutStreamer.EmitRawText(O.str()); 1653} 1654 1655 1656void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1657 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1658 bool ignored; 1659 unsigned int numHex; 1660 const char *lead; 1661 1662 if (Fp->getType()->getTypeID()==Type::FloatTyID) { 1663 numHex = 8; 1664 lead = "0f"; 1665 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, 1666 &ignored); 1667 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1668 numHex = 16; 1669 lead = "0d"; 1670 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, 1671 &ignored); 1672 } else 1673 llvm_unreachable("unsupported fp type"); 1674 1675 APInt API = APF.bitcastToAPInt(); 1676 std::string hexstr(utohexstr(API.getZExtValue())); 1677 O << lead; 1678 if (hexstr.length() < numHex) 1679 O << std::string(numHex - hexstr.length(), '0'); 1680 O << utohexstr(API.getZExtValue()); 1681} 1682 1683void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { 1684 if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1685 O << CI->getValue(); 1686 return; 1687 } 1688 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1689 printFPConstant(CFP, O); 1690 return; 1691 } 1692 if (isa<ConstantPointerNull>(CPV)) { 1693 O << "0"; 1694 return; 1695 } 1696 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1697 O << *Mang->getSymbol(GVar); 1698 return; 1699 } 1700 if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1701 Value *v = Cexpr->stripPointerCasts(); 1702 if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1703 O << *Mang->getSymbol(GVar); 1704 return; 1705 } else { 1706 O << *LowerConstant(CPV, *this); 1707 return; 1708 } 1709 } 1710 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1711} 1712 1713 1714void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, 1715 AggBuffer *aggBuffer) { 1716 1717 const TargetData *TD = TM.getTargetData(); 1718 1719 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1720 int s = TD->getTypeAllocSize(CPV->getType()); 1721 if (s<Bytes) 1722 s = Bytes; 1723 aggBuffer->addZeros(s); 1724 return; 1725 } 1726 1727 unsigned char *ptr; 1728 switch (CPV->getType()->getTypeID()) { 1729 1730 case Type::IntegerTyID: { 1731 const Type *ETy = CPV->getType(); 1732 if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ 1733 unsigned char c = 1734 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1735 ptr = &c; 1736 aggBuffer->addBytes(ptr, 1, Bytes); 1737 } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { 1738 short int16 = 1739 (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1740 ptr = (unsigned char*)&int16; 1741 aggBuffer->addBytes(ptr, 2, Bytes); 1742 } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { 1743 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1744 int int32 =(int)(constInt->getZExtValue()); 1745 ptr = (unsigned char*)&int32; 1746 aggBuffer->addBytes(ptr, 4, Bytes); 1747 break; 1748 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1749 if (ConstantInt *constInt = 1750 dyn_cast<ConstantInt>(ConstantFoldConstantExpression( 1751 Cexpr, TD))) { 1752 int int32 =(int)(constInt->getZExtValue()); 1753 ptr = (unsigned char*)&int32; 1754 aggBuffer->addBytes(ptr, 4, Bytes); 1755 break; 1756 } 1757 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1758 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1759 aggBuffer->addSymbol(v); 1760 aggBuffer->addZeros(4); 1761 break; 1762 } 1763 } 1764 llvm_unreachable("unsupported integer const type"); 1765 } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { 1766 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1767 long long int64 =(long long)(constInt->getZExtValue()); 1768 ptr = (unsigned char*)&int64; 1769 aggBuffer->addBytes(ptr, 8, Bytes); 1770 break; 1771 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1772 if (ConstantInt *constInt = dyn_cast<ConstantInt>( 1773 ConstantFoldConstantExpression(Cexpr, TD))) { 1774 long long int64 =(long long)(constInt->getZExtValue()); 1775 ptr = (unsigned char*)&int64; 1776 aggBuffer->addBytes(ptr, 8, Bytes); 1777 break; 1778 } 1779 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1780 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1781 aggBuffer->addSymbol(v); 1782 aggBuffer->addZeros(8); 1783 break; 1784 } 1785 } 1786 llvm_unreachable("unsupported integer const type"); 1787 } else 1788 llvm_unreachable("unsupported integer const type"); 1789 break; 1790 } 1791 case Type::FloatTyID: 1792 case Type::DoubleTyID: { 1793 ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1794 const Type* Ty = CFP->getType(); 1795 if (Ty == Type::getFloatTy(CPV->getContext())) { 1796 float float32 = (float)CFP->getValueAPF().convertToFloat(); 1797 ptr = (unsigned char*)&float32; 1798 aggBuffer->addBytes(ptr, 4, Bytes); 1799 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1800 double float64 = CFP->getValueAPF().convertToDouble(); 1801 ptr = (unsigned char*)&float64; 1802 aggBuffer->addBytes(ptr, 8, Bytes); 1803 } 1804 else { 1805 llvm_unreachable("unsupported fp const type"); 1806 } 1807 break; 1808 } 1809 case Type::PointerTyID: { 1810 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1811 aggBuffer->addSymbol(GVar); 1812 } 1813 else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1814 Value *v = Cexpr->stripPointerCasts(); 1815 aggBuffer->addSymbol(v); 1816 } 1817 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1818 aggBuffer->addZeros(s); 1819 break; 1820 } 1821 1822 case Type::ArrayTyID: 1823 case Type::VectorTyID: 1824 case Type::StructTyID: { 1825 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1826 isa<ConstantStruct>(CPV)) { 1827 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1828 bufferAggregateConstant(CPV, aggBuffer); 1829 if ( Bytes > ElementSize ) 1830 aggBuffer->addZeros(Bytes-ElementSize); 1831 } 1832 else if (isa<ConstantAggregateZero>(CPV)) 1833 aggBuffer->addZeros(Bytes); 1834 else 1835 llvm_unreachable("Unexpected Constant type"); 1836 break; 1837 } 1838 1839 default: 1840 llvm_unreachable("unsupported type"); 1841 } 1842} 1843 1844void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, 1845 AggBuffer *aggBuffer) { 1846 const TargetData *TD = TM.getTargetData(); 1847 int Bytes; 1848 1849 // Old constants 1850 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1851 if (CPV->getNumOperands()) 1852 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1853 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1854 return; 1855 } 1856 1857 if (const ConstantDataSequential *CDS = 1858 dyn_cast<ConstantDataSequential>(CPV)) { 1859 if (CDS->getNumElements()) 1860 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1861 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1862 aggBuffer); 1863 return; 1864 } 1865 1866 1867 if (isa<ConstantStruct>(CPV)) { 1868 if (CPV->getNumOperands()) { 1869 StructType *ST = cast<StructType>(CPV->getType()); 1870 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1871 if ( i == (e - 1)) 1872 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1873 TD->getTypeAllocSize(ST) 1874 - TD->getStructLayout(ST)->getElementOffset(i); 1875 else 1876 Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - 1877 TD->getStructLayout(ST)->getElementOffset(i); 1878 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, 1879 aggBuffer); 1880 } 1881 } 1882 return; 1883 } 1884 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1885} 1886 1887// buildTypeNameMap - Run through symbol table looking for type names. 1888// 1889 1890 1891bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1892 1893 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1894 1895 if (PI != TypeNameMap.end() && 1896 (!PI->second.compare("struct._image1d_t") || 1897 !PI->second.compare("struct._image2d_t") || 1898 !PI->second.compare("struct._image3d_t"))) 1899 return true; 1900 1901 return false; 1902} 1903 1904/// PrintAsmOperand - Print out an operand for an inline asm expression. 1905/// 1906bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 1907 unsigned AsmVariant, 1908 const char *ExtraCode, 1909 raw_ostream &O) { 1910 if (ExtraCode && ExtraCode[0]) { 1911 if (ExtraCode[1] != 0) return true; // Unknown modifier. 1912 1913 switch (ExtraCode[0]) { 1914 default: 1915 // See if this is a generic print operand 1916 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 1917 case 'r': 1918 break; 1919 } 1920 } 1921 1922 printOperand(MI, OpNo, O); 1923 1924 return false; 1925} 1926 1927bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, 1928 unsigned OpNo, 1929 unsigned AsmVariant, 1930 const char *ExtraCode, 1931 raw_ostream &O) { 1932 if (ExtraCode && ExtraCode[0]) 1933 return true; // Unknown modifier 1934 1935 O << '['; 1936 printMemOperand(MI, OpNo, O); 1937 O << ']'; 1938 1939 return false; 1940} 1941 1942bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) 1943{ 1944 switch(MI.getOpcode()) { 1945 default: 1946 return false; 1947 case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: 1948 case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: 1949 case NVPTX::CallArgF64: case NVPTX::CallArgI16: 1950 case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: 1951 case NVPTX::CallArgI64: case NVPTX::CallArgI8: 1952 case NVPTX::CallArgParam: case NVPTX::CallVoidInst: 1953 case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: 1954 case NVPTX::CallVoidInstReg64: 1955 case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: 1956 case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: 1957 case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: 1958 case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: 1959 case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: 1960 case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: 1961 case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: 1962 case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: 1963 case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: 1964 case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: 1965 case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: 1966 case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: 1967 case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: 1968 case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: 1969 case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: 1970 case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: 1971 case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: 1972 case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: 1973 case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: 1974 case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: 1975 case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: 1976 case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: 1977 case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: 1978 case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: 1979 case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: 1980 case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: 1981 case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: 1982 case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: 1983 case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: 1984 case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: 1985 case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: 1986 case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: 1987 case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: 1988 case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: 1989 case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: 1990 case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: 1991 case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: 1992 case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: 1993 case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: 1994 case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: 1995 case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: 1996 case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: 1997 case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: 1998 case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: 1999 case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: 2000 case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: 2001 case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: 2002 case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: 2003 case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: 2004 case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: 2005 case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: 2006 case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: 2007 return true; 2008 } 2009 return false; 2010} 2011 2012// Force static initialization. 2013extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2014 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2015 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2016} 2017 2018 2019void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2020 std::stringstream temp; 2021 LineReader * reader = this->getReader(filename.str()); 2022 temp << "\n//"; 2023 temp << filename.str(); 2024 temp << ":"; 2025 temp << line; 2026 temp << " "; 2027 temp << reader->readLine(line); 2028 temp << "\n"; 2029 this->OutStreamer.EmitRawText(Twine(temp.str())); 2030} 2031 2032 2033LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2034 if (reader == NULL) { 2035 reader = new LineReader(filename); 2036 } 2037 2038 if (reader->fileName() != filename) { 2039 delete reader; 2040 reader = new LineReader(filename); 2041 } 2042 2043 return reader; 2044} 2045 2046 2047std::string 2048LineReader::readLine(unsigned lineNum) { 2049 if (lineNum < theCurLine) { 2050 theCurLine = 0; 2051 fstr.seekg(0,std::ios::beg); 2052 } 2053 while (theCurLine < lineNum) { 2054 fstr.getline(buff,500); 2055 theCurLine++; 2056 } 2057 return buff; 2058} 2059 2060// Force static initialization. 2061extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2062 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2063 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2064} 2065