1//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass converts operations on vector types to operations on their 11// element types. 12// 13// For generic binary and unary vector instructions, the conversion is simple. 14// Suppose we have 15// av = bv Vop cv 16// where av, bv, and cv are vector virtual registers, and Vop is a vector op. 17// This gets converted to the following : 18// a1 = b1 Sop c1 19// a2 = b2 Sop c2 20// 21// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. 22// For the above example, the map will look as follows: 23// av => [a1, a2] 24// bv => [b1, b2] 25// 26// In addition, initVectorInfo creates the following opcode->opcode map. 27// Vop => Sop 28// OtherVop => OtherSop 29// ... 30// 31// For vector specific instructions like vecbuild, vecshuffle etc, the 32// conversion is different. Look at comments near the functions with 33// prefix createVec<...>. 34// 35//===----------------------------------------------------------------------===// 36 37#include "llvm/CodeGen/Passes.h" 38#include "llvm/Constant.h" 39#include "llvm/Instructions.h" 40#include "llvm/Function.h" 41#include "llvm/Pass.h" 42#include "llvm/Type.h" 43#include "llvm/Support/CommandLine.h" 44#include "llvm/CodeGen/MachineFunctionPass.h" 45#include "llvm/CodeGen/MachineModuleInfo.h" 46#include "llvm/CodeGen/MachineRegisterInfo.h" 47#include "llvm/CodeGen/MachineInstrBuilder.h" 48#include "llvm/Support/CFG.h" 49#include "llvm/Support/Compiler.h" 50#include "llvm/Target/TargetInstrInfo.h" 51#include "llvm/ADT/DepthFirstIterator.h" 52#include "llvm/ADT/SmallPtrSet.h" 53#include "NVPTX.h" 54#include "NVPTXTargetMachine.h" 55 56using namespace llvm; 57 58namespace { 59 60class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { 61 virtual bool runOnMachineFunction(MachineFunction &F); 62 63 NVPTXTargetMachine &TM; 64 MachineRegisterInfo *MRI; 65 const NVPTXRegisterInfo *RegInfo; 66 const NVPTXInstrInfo *InstrInfo; 67 68 llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *> 69 RegClassMap; 70 llvm::DenseMap<unsigned, bool> SimpleMoveMap; 71 72 llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap; 73 74 bool isVectorInstr(MachineInstr *); 75 76 SmallVector<unsigned, 4> getScalarRegisters(unsigned); 77 unsigned getScalarVersion(unsigned); 78 unsigned getScalarVersion(MachineInstr *); 79 80 bool isVectorRegister(unsigned); 81 const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); 82 unsigned numCopiesNeeded(MachineInstr *); 83 84 void createLoadCopy(MachineFunction&, MachineInstr *, 85 std::vector<MachineInstr *>&); 86 void createStoreCopy(MachineFunction&, MachineInstr *, 87 std::vector<MachineInstr *>&); 88 89 void createVecDest(MachineFunction&, MachineInstr *, 90 std::vector<MachineInstr *>&); 91 92 void createCopies(MachineFunction&, MachineInstr *, 93 std::vector<MachineInstr *>&); 94 95 unsigned copyProp(MachineFunction&); 96 unsigned removeDeadMoves(MachineFunction&); 97 98 void elementize(MachineFunction&); 99 100 bool isSimpleMove(MachineInstr *); 101 102 void createVecShuffle(MachineFunction& F, MachineInstr *Instr, 103 std::vector<MachineInstr *>& copies); 104 105 void createVecExtract(MachineFunction& F, MachineInstr *Instr, 106 std::vector<MachineInstr *>& copies); 107 108 void createVecInsert(MachineFunction& F, MachineInstr *Instr, 109 std::vector<MachineInstr *>& copies); 110 111 void createVecBuild(MachineFunction& F, MachineInstr *Instr, 112 std::vector<MachineInstr *>& copies); 113 114public: 115 116 static char ID; // Pass identification, replacement for typeid 117 VectorElementize(NVPTXTargetMachine &tm) 118 : MachineFunctionPass(ID), TM(tm) {} 119 120 virtual const char *getPassName() const { 121 return "Convert LLVM vector types to their element types"; 122 } 123}; 124 125char VectorElementize::ID = 1; 126} 127 128static cl::opt<bool> 129RemoveRedundantMoves("nvptx-remove-redundant-moves", 130 cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), 131 cl::init(true)); 132 133#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ 134 >> NVPTX::VecInstTypeShift) 135#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) 136#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) 137#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) 138#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) 139#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) 140#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) 141#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) 142#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) 143 144bool VectorElementize::isSimpleMove(MachineInstr *mi) { 145 if (mi->isCopy()) 146 return true; 147 unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) 148 >> NVPTX::SimpleMoveShift; 149 return (TSFlags == 1); 150} 151 152bool VectorElementize::isVectorInstr(MachineInstr *mi) { 153 if ((mi->getOpcode() == NVPTX::PHI) || 154 (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { 155 MachineOperand dest = mi->getOperand(0); 156 return isVectorRegister(dest.getReg()); 157 } 158 return ISVECINST(mi); 159} 160 161unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { 162 return getScalarVersion(mi->getOpcode()); 163} 164 165///============================================================================= 166///Instr is assumed to be a vector instruction. For most vector instructions, 167///the size of the destination vector register gives the number of scalar copies 168///needed. For VecStore, size of getOperand(1) gives the number of scalar copies 169///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the 170///number of scalar copies needed. 171///============================================================================= 172unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { 173 unsigned numDefs=0; 174 unsigned def; 175 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { 176 MachineOperand oper = Instr->getOperand(i); 177 178 if (!oper.isReg()) continue; 179 if (!oper.isDef()) continue; 180 def = i; 181 numDefs++; 182 } 183 assert((numDefs <= 1) && "Only 0 or 1 defs supported"); 184 185 if (numDefs == 1) { 186 unsigned regnum = Instr->getOperand(def).getReg(); 187 if (ISVECEXTRACT(Instr)) 188 regnum = Instr->getOperand(1).getReg(); 189 return getNVPTXVectorSize(MRI->getRegClass(regnum)); 190 } 191 else if (numDefs == 0) { 192 assert(ISVECSTORE(Instr) 193 && "Only 0 def instruction supported is vector store"); 194 195 unsigned regnum = Instr->getOperand(0).getReg(); 196 return getNVPTXVectorSize(MRI->getRegClass(regnum)); 197 } 198 return 1; 199} 200 201const TargetRegisterClass *VectorElementize:: 202getScalarRegClass(const TargetRegisterClass *RC) { 203 assert(isNVPTXVectorRegClass(RC) && 204 "Not a vector register class"); 205 return getNVPTXElemClass(RC); 206} 207 208bool VectorElementize::isVectorRegister(unsigned reg) { 209 const TargetRegisterClass *RC=MRI->getRegClass(reg); 210 return isNVPTXVectorRegClass(RC); 211} 212 213///============================================================================= 214///For every vector register 'v' that is not already in the VectorToScalarMap, 215///create n scalar registers of the corresponding element type, where n 216///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. 217///============================================================================= 218SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) { 219 assert(isVectorRegister(regnum) && "Expecting a vector register here"); 220 // Create the scalar registers and put them in the map, if not already there. 221 if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { 222 const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); 223 const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); 224 225 SmallVector<unsigned, 4> temp; 226 227 for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) 228 temp.push_back(MRI->createVirtualRegister(scalarClass)); 229 230 VectorToScalarMap[regnum] = temp; 231 } 232 return VectorToScalarMap[regnum]; 233} 234 235///============================================================================= 236///For a vector load of the form 237///va <= ldv2 [addr] 238///the following multi output instruction is created : 239///[v1, v2] <= LD [addr] 240///Look at NVPTXVector.td for the definitions of multi output loads. 241///============================================================================= 242void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, 243 std::vector<MachineInstr *>& copies) { 244 copies.push_back(F.CloneMachineInstr(Instr)); 245 246 MachineInstr *copy=copies[0]; 247 copy->setDesc(InstrInfo->get(getScalarVersion(copy))); 248 249 // Remove the dest, that should be a vector operand. 250 MachineOperand dest = copy->getOperand(0); 251 unsigned regnum = dest.getReg(); 252 253 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); 254 copy->RemoveOperand(0); 255 256 std::vector<MachineOperand> otherOperands; 257 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 258 otherOperands.push_back(copy->getOperand(i)); 259 260 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 261 copy->RemoveOperand(0); 262 263 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) { 264 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); 265 } 266 267 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) 268 copy->addOperand(otherOperands[i]); 269 270} 271 272///============================================================================= 273///For a vector store of the form 274///stv2 va, [addr] 275///the following multi input instruction is created : 276///ST v1, v2, [addr] 277///Look at NVPTXVector.td for the definitions of multi input stores. 278///============================================================================= 279void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, 280 std::vector<MachineInstr *>& copies) { 281 copies.push_back(F.CloneMachineInstr(Instr)); 282 283 MachineInstr *copy=copies[0]; 284 copy->setDesc(InstrInfo->get(getScalarVersion(copy))); 285 286 MachineOperand src = copy->getOperand(0); 287 unsigned regnum = src.getReg(); 288 289 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); 290 copy->RemoveOperand(0); 291 292 std::vector<MachineOperand> otherOperands; 293 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 294 otherOperands.push_back(copy->getOperand(i)); 295 296 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 297 copy->RemoveOperand(0); 298 299 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) 300 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false)); 301 302 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) 303 copy->addOperand(otherOperands[i]); 304} 305 306///============================================================================= 307///va <= shufflev2 vb, vc, <i1>, <i2> 308///gets converted to 2 moves into a1 and a2. The source of the moves depend on 309///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For 310///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move 311///instructions will be 312///a1 <= c2 313///a2 <= b1 314///============================================================================= 315void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, 316 std::vector<MachineInstr *>& copies) { 317 unsigned numcopies=numCopiesNeeded(Instr); 318 319 unsigned destregnum = Instr->getOperand(0).getReg(); 320 unsigned src1regnum = Instr->getOperand(1).getReg(); 321 unsigned src2regnum = Instr->getOperand(2).getReg(); 322 323 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); 324 SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum); 325 SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum); 326 327 DebugLoc DL = Instr->getDebugLoc(); 328 329 for (unsigned i=0; i<numcopies; i++) { 330 MachineInstr *copy = BuildMI(F, DL, 331 InstrInfo->get(getScalarVersion(Instr)), dest[i]); 332 MachineOperand which=Instr->getOperand(3+i); 333 assert(which.isImm() && "Shuffle operand not a constant"); 334 335 int src=which.getImm(); 336 int elem=src%numcopies; 337 338 if (which.getImm() < numcopies) 339 copy->addOperand(MachineOperand::CreateReg(src1[elem], false)); 340 else 341 copy->addOperand(MachineOperand::CreateReg(src2[elem], false)); 342 copies.push_back(copy); 343 } 344} 345 346///============================================================================= 347///a <= extractv2 va, <i1> 348///gets turned into a simple move to the scalar register a. The source depends 349///on i1. 350///============================================================================= 351void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, 352 std::vector<MachineInstr *>& copies) { 353 unsigned srcregnum = Instr->getOperand(1).getReg(); 354 355 SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); 356 357 MachineOperand which = Instr->getOperand(2); 358 assert(which.isImm() && "Extract operand not a constant"); 359 360 DebugLoc DL = Instr->getDebugLoc(); 361 362 MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), 363 Instr->getOperand(0).getReg()); 364 copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false)); 365 366 copies.push_back(copy); 367} 368 369///============================================================================= 370///va <= vecinsertv2 vb, c, <i1> 371///This instruction copies all elements of vb to va, except the 'i1'th element. 372///The scalar value c becomes the 'i1'th element of va. 373///This gets translated to 2 (4 for vecinsertv4) moves. 374///============================================================================= 375void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, 376 std::vector<MachineInstr *>& copies) { 377 unsigned numcopies=numCopiesNeeded(Instr); 378 379 unsigned destregnum = Instr->getOperand(0).getReg(); 380 unsigned srcregnum = Instr->getOperand(1).getReg(); 381 382 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); 383 SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); 384 385 MachineOperand which=Instr->getOperand(3); 386 assert(which.isImm() && "Insert operand not a constant"); 387 unsigned int elem=which.getImm(); 388 389 DebugLoc DL = Instr->getDebugLoc(); 390 391 for (unsigned i=0; i<numcopies; i++) { 392 MachineInstr *copy = BuildMI(F, DL, 393 InstrInfo->get(getScalarVersion(Instr)), dest[i]); 394 395 if (i != elem) 396 copy->addOperand(MachineOperand::CreateReg(src[i], false)); 397 else 398 copy->addOperand(Instr->getOperand(2)); 399 400 copies.push_back(copy); 401 } 402 403} 404 405///============================================================================= 406///va <= buildv2 b1, b2 407///gets translated to 408///a1 <= b1 409///a2 <= b2 410///============================================================================= 411void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, 412 std::vector<MachineInstr *>& copies) { 413 unsigned numcopies=numCopiesNeeded(Instr); 414 415 unsigned destregnum = Instr->getOperand(0).getReg(); 416 417 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); 418 419 DebugLoc DL = Instr->getDebugLoc(); 420 421 for (unsigned i=0; i<numcopies; i++) { 422 MachineInstr *copy = BuildMI(F, DL, 423 InstrInfo->get(getScalarVersion(Instr)), dest[i]); 424 425 copy->addOperand(Instr->getOperand(1+i)); 426 427 copies.push_back(copy); 428 } 429 430} 431 432///============================================================================= 433///For a tex inst of the form 434///va <= op [scalar operands] 435///the following multi output instruction is created : 436///[v1, v2] <= op' [scalar operands] 437///============================================================================= 438void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, 439 std::vector<MachineInstr *>& copies) { 440 copies.push_back(F.CloneMachineInstr(Instr)); 441 442 MachineInstr *copy=copies[0]; 443 copy->setDesc(InstrInfo->get(getScalarVersion(copy))); 444 445 // Remove the dest, that should be a vector operand. 446 MachineOperand dest = copy->getOperand(0); 447 unsigned regnum = dest.getReg(); 448 449 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); 450 copy->RemoveOperand(0); 451 452 std::vector<MachineOperand> otherOperands; 453 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 454 otherOperands.push_back(copy->getOperand(i)); 455 456 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) 457 copy->RemoveOperand(0); 458 459 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) 460 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); 461 462 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) 463 copy->addOperand(otherOperands[i]); 464} 465 466///============================================================================= 467///Look at the vector instruction type and dispatch to the createVec<...> 468///function that creates the scalar copies. 469///============================================================================= 470void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, 471 std::vector<MachineInstr *>& copies) { 472 if (ISVECLOAD(Instr)) { 473 createLoadCopy(F, Instr, copies); 474 return; 475 } 476 if (ISVECSTORE(Instr)) { 477 createStoreCopy(F, Instr, copies); 478 return; 479 } 480 if (ISVECSHUFFLE(Instr)) { 481 createVecShuffle(F, Instr, copies); 482 return; 483 } 484 if (ISVECEXTRACT(Instr)) { 485 createVecExtract(F, Instr, copies); 486 return; 487 } 488 if (ISVECINSERT(Instr)) { 489 createVecInsert(F, Instr, copies); 490 return; 491 } 492 if (ISVECDEST(Instr)) { 493 createVecDest(F, Instr, copies); 494 return; 495 } 496 if (ISVECBUILD(Instr)) { 497 createVecBuild(F, Instr, copies); 498 return; 499 } 500 501 unsigned numcopies=numCopiesNeeded(Instr); 502 503 for (unsigned i=0; i<numcopies; ++i) 504 copies.push_back(F.CloneMachineInstr(Instr)); 505 506 for (unsigned i=0; i<numcopies; ++i) { 507 MachineInstr *copy = copies[i]; 508 509 std::vector<MachineOperand> allOperands; 510 std::vector<bool> isDef; 511 512 for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { 513 MachineOperand oper = copy->getOperand(j); 514 allOperands.push_back(oper); 515 if (oper.isReg()) 516 isDef.push_back(oper.isDef()); 517 else 518 isDef.push_back(false); 519 } 520 521 for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) 522 copy->RemoveOperand(0); 523 524 copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); 525 526 for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { 527 MachineOperand oper=allOperands[j]; 528 if (oper.isReg()) { 529 unsigned regnum = oper.getReg(); 530 if (isVectorRegister(regnum)) { 531 532 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); 533 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j])); 534 } 535 else 536 copy->addOperand(oper); 537 } 538 else 539 copy->addOperand(oper); 540 } 541 } 542} 543 544///============================================================================= 545///Scan through all basic blocks, looking for vector instructions. 546///For each vector instruction I, insert the scalar copies before I, and 547///add I into toRemove vector. Finally remove all instructions in toRemove. 548///============================================================================= 549void VectorElementize::elementize(MachineFunction &F) { 550 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); 551 BI!=BE; ++BI) { 552 MachineBasicBlock *BB = &*BI; 553 554 std::vector<MachineInstr *> copies; 555 std::vector<MachineInstr *> toRemove; 556 557 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); 558 II!=IE; ++II) { 559 MachineInstr *Instr = &*II; 560 561 if (!isVectorInstr(Instr)) 562 continue; 563 564 copies.clear(); 565 createCopies(F, Instr, copies); 566 for (unsigned i=0, e=copies.size(); i!=e; ++i) 567 BB->insert(II, copies[i]); 568 569 assert((copies.size() > 0) && "Problem in createCopies"); 570 toRemove.push_back(Instr); 571 } 572 for (unsigned i=0, e=toRemove.size(); i!=e; ++i) 573 F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); 574 } 575} 576 577///============================================================================= 578///a <= b 579///... 580///... 581///x <= op(a, ...) 582///gets converted to 583/// 584///x <= op(b, ...) 585///The original move is still present. This works on SSA form machine code. 586///Note that a <= b should be a simple vreg-to-vreg move instruction. 587///TBD : I didn't find a function that can do replaceOperand, so I remove 588///all operands and add all of them again, replacing the one while adding. 589///============================================================================= 590unsigned VectorElementize::copyProp(MachineFunction &F) { 591 unsigned numReplacements = 0; 592 593 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; 594 ++BI) { 595 MachineBasicBlock *BB = &*BI; 596 597 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; 598 ++II) { 599 MachineInstr *Instr = &*II; 600 601 // Don't do copy propagation on PHI as it will cause unnecessary 602 // live range overlap. 603 if ((Instr->getOpcode() == TargetOpcode::PHI) || 604 (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) 605 continue; 606 607 bool needsReplacement = false; 608 609 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { 610 MachineOperand oper = Instr->getOperand(i); 611 if (!oper.isReg()) continue; 612 if (oper.isDef()) continue; 613 if (!RegInfo->isVirtualRegister(oper.getReg())) continue; 614 615 MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); 616 617 if (!defInstr) continue; 618 619 if (!isSimpleMove(defInstr)) continue; 620 621 MachineOperand defSrc = defInstr->getOperand(1); 622 if (!defSrc.isReg()) continue; 623 if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; 624 625 needsReplacement = true; 626 627 } 628 if (!needsReplacement) continue; 629 630 numReplacements++; 631 632 std::vector<MachineOperand> operands; 633 634 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { 635 MachineOperand oper = Instr->getOperand(i); 636 bool flag = false; 637 do { 638 if (!(oper.isReg())) 639 break; 640 if (oper.isDef()) 641 break; 642 if (!(RegInfo->isVirtualRegister(oper.getReg()))) 643 break; 644 MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); 645 if (!(isSimpleMove(defInstr))) 646 break; 647 MachineOperand defSrc = defInstr->getOperand(1); 648 if (!(defSrc.isReg())) 649 break; 650 if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) 651 break; 652 operands.push_back(defSrc); 653 flag = true; 654 } while (0); 655 if (flag == false) 656 operands.push_back(oper); 657 } 658 659 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) 660 Instr->RemoveOperand(0); 661 for (unsigned i=0, e=operands.size(); i!=e; ++i) 662 Instr->addOperand(operands[i]); 663 664 } 665 } 666 return numReplacements; 667} 668 669///============================================================================= 670///Look for simple vreg-to-vreg instructions whose use_empty() is true, add 671///them to deadMoves vector. Then remove all instructions in deadMoves. 672///============================================================================= 673unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { 674 std::vector<MachineInstr *> deadMoves; 675 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; 676 ++BI) { 677 MachineBasicBlock *BB = &*BI; 678 679 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; 680 ++II) { 681 MachineInstr *Instr = &*II; 682 683 if (!isSimpleMove(Instr)) continue; 684 685 MachineOperand dest = Instr->getOperand(0); 686 assert(dest.isReg() && "dest of move not a register"); 687 assert(RegInfo->isVirtualRegister(dest.getReg()) && 688 "dest of move not a virtual register"); 689 690 if (MRI->use_empty(dest.getReg())) { 691 deadMoves.push_back(Instr); 692 } 693 } 694 } 695 696 for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) 697 F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); 698 699 return deadMoves.size(); 700} 701 702///============================================================================= 703///Main function for this pass. 704///============================================================================= 705bool VectorElementize::runOnMachineFunction(MachineFunction &F) { 706 MRI = &F.getRegInfo(); 707 708 RegInfo = TM.getRegisterInfo(); 709 InstrInfo = TM.getInstrInfo(); 710 711 VectorToScalarMap.clear(); 712 713 elementize(F); 714 715 if (RemoveRedundantMoves) 716 while (1) { 717 if (copyProp(F) == 0) break; 718 removeDeadMoves(F); 719 } 720 721 return true; 722} 723 724FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { 725 return new VectorElementize(tm); 726} 727 728unsigned VectorElementize::getScalarVersion(unsigned opcode) { 729 if (opcode == NVPTX::PHI) 730 return opcode; 731 if (opcode == NVPTX::IMPLICIT_DEF) 732 return opcode; 733 switch(opcode) { 734 default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td"); 735 case TargetOpcode::COPY: return TargetOpcode::COPY; 736 case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; 737 case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; 738 case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; 739 case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; 740 case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; 741 case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; 742 case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; 743 case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; 744 case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; 745 case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; 746 case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; 747 case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; 748 case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; 749 case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; 750 case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; 751 case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; 752 case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; 753 case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; 754 case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; 755 case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; 756 case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; 757 case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; 758 case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; 759 case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; 760 case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; 761 case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; 762 case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; 763 case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; 764 case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; 765 case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; 766 case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; 767 case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; 768 case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; 769 case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; 770 case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; 771 case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; 772 case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; 773 case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; 774 case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; 775 case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; 776 case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; 777 case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; 778 case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; 779 case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; 780 case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; 781 case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; 782 case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; 783 case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; 784 case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; 785 case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; 786 case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; 787 case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; 788 case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; 789 case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; 790 case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; 791 case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; 792 case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; 793 case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; 794 case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; 795 case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; 796 case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; 797 case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; 798 case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; 799 case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; 800 case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; 801 case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; 802 case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; 803 case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; 804 case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; 805 case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; 806 case NVPTX::I16MADV2: return NVPTX::MAD16rrr; 807 case NVPTX::I16MADV4: return NVPTX::MAD16rrr; 808 case NVPTX::I32MADV2: return NVPTX::MAD32rrr; 809 case NVPTX::I32MADV4: return NVPTX::MAD32rrr; 810 case NVPTX::I64MADV2: return NVPTX::MAD64rrr; 811 case NVPTX::I8MADV2: return NVPTX::MAD8rrr; 812 case NVPTX::I8MADV4: return NVPTX::MAD8rrr; 813 case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; 814 case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; 815 case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; 816 case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; 817 case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; 818 case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; 819 case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; 820 case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; 821 case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; 822 case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; 823 case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; 824 case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; 825 case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; 826 case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; 827 case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; 828 case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; 829 case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; 830 case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; 831 case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; 832 case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; 833 case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; 834 case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; 835 case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; 836 case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; 837 case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; 838 case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; 839 case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; 840 case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; 841 case NVPTX::V2F32Div: return NVPTX::FDIV32rr; 842 case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; 843 case NVPTX::V2F64Div: return NVPTX::FDIV64rr; 844 case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; 845 case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; 846 case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; 847 case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; 848 case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; 849 case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; 850 case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; 851 case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; 852 case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; 853 case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; 854 case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; 855 case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; 856 case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; 857 case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; 858 case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; 859 case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; 860 case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; 861 case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; 862 case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; 863 case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; 864 case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; 865 case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; 866 case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; 867 case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; 868 case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; 869 case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; 870 case NVPTX::V4F32Div: return NVPTX::FDIV32rr; 871 case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; 872 case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; 873 case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; 874 case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; 875 case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; 876 case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; 877 case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; 878 case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; 879 case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; 880 case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; 881 case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; 882 case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; 883 case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; 884 case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; 885 case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; 886 case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; 887 case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; 888 case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; 889 case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; 890 case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; 891 case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; 892 case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; 893 case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; 894 case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; 895 case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; 896 case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; 897 case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; 898 case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; 899 case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; 900 case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; 901 case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; 902 case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; 903 case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; 904 case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; 905 case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; 906 case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; 907 case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; 908 case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; 909 case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; 910 case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; 911 case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; 912 case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; 913 case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; 914 case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; 915 case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; 916 case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; 917 case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; 918 case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; 919 case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; 920 case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; 921 case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; 922 case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; 923 case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; 924 case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; 925 case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; 926 case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; 927 case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; 928 case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; 929 case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; 930 case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; 931 case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; 932 case NVPTX::VNegV2I16: return NVPTX::INEG16; 933 case NVPTX::VNegV2I32: return NVPTX::INEG32; 934 case NVPTX::VNegV2I64: return NVPTX::INEG64; 935 case NVPTX::VNegV2I8: return NVPTX::INEG8; 936 case NVPTX::VNegV4I16: return NVPTX::INEG16; 937 case NVPTX::VNegV4I32: return NVPTX::INEG32; 938 case NVPTX::VNegV4I8: return NVPTX::INEG8; 939 case NVPTX::VNegv2f32: return NVPTX::FNEGf32; 940 case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; 941 case NVPTX::VNegv2f64: return NVPTX::FNEGf64; 942 case NVPTX::VNegv4f32: return NVPTX::FNEGf32; 943 case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; 944 case NVPTX::VNotV2I16: return NVPTX::NOT16; 945 case NVPTX::VNotV2I32: return NVPTX::NOT32; 946 case NVPTX::VNotV2I64: return NVPTX::NOT64; 947 case NVPTX::VNotV2I8: return NVPTX::NOT8; 948 case NVPTX::VNotV4I16: return NVPTX::NOT16; 949 case NVPTX::VNotV4I32: return NVPTX::NOT32; 950 case NVPTX::VNotV4I8: return NVPTX::NOT8; 951 case NVPTX::VOrV2I16: return NVPTX::ORb16rr; 952 case NVPTX::VOrV2I32: return NVPTX::ORb32rr; 953 case NVPTX::VOrV2I64: return NVPTX::ORb64rr; 954 case NVPTX::VOrV2I8: return NVPTX::ORb8rr; 955 case NVPTX::VOrV4I16: return NVPTX::ORb16rr; 956 case NVPTX::VOrV4I32: return NVPTX::ORb32rr; 957 case NVPTX::VOrV4I8: return NVPTX::ORb8rr; 958 case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; 959 case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; 960 case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; 961 case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; 962 case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; 963 case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; 964 case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; 965 case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; 966 case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; 967 case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; 968 case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; 969 case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; 970 case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; 971 case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; 972 case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; 973 case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; 974 case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; 975 case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; 976 case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; 977 case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; 978 case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; 979 case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; 980 case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; 981 case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; 982 case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; 983 case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; 984 case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; 985 case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; 986 case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; 987 case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; 988 case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; 989 case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; 990 case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; 991 case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; 992 case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; 993 case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; 994 case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; 995 case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; 996 case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; 997 case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; 998 case NVPTX::VXorV2I16: return NVPTX::XORb16rr; 999 case NVPTX::VXorV2I32: return NVPTX::XORb32rr; 1000 case NVPTX::VXorV2I64: return NVPTX::XORb64rr; 1001 case NVPTX::VXorV2I8: return NVPTX::XORb8rr; 1002 case NVPTX::VXorV4I16: return NVPTX::XORb16rr; 1003 case NVPTX::VXorV4I32: return NVPTX::XORb32rr; 1004 case NVPTX::VXorV4I8: return NVPTX::XORb8rr; 1005 case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; 1006 case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; 1007 case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; 1008 case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; 1009 case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; 1010 case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; 1011 case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; 1012 case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; 1013 case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; 1014 case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; 1015 case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; 1016 case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; 1017 case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; 1018 case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; 1019 case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; 1020 case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; 1021 case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; 1022 case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; 1023 case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; 1024 case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; 1025 case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; 1026 case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; 1027 case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; 1028 case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; 1029 case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; 1030 case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; 1031 case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; 1032 case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; 1033 case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; 1034 case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; 1035 case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; 1036 case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; 1037 case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; 1038 case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; 1039 case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; 1040 case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; 1041 case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; 1042 case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; 1043 case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; 1044 case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; 1045 case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; 1046 case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; 1047 case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; 1048 case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; 1049 case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; 1050 case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; 1051 case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; 1052 case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; 1053 case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; 1054 case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; 1055 case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; 1056 case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; 1057 case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; 1058 case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; 1059 case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; 1060 case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; 1061 case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; 1062 case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; 1063 case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; 1064 case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; 1065 case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; 1066 case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; 1067 case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; 1068 case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; 1069 case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; 1070 case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; 1071 case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; 1072 case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; 1073 case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; 1074 case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; 1075 case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; 1076 case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; 1077 case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; 1078 case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; 1079 case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; 1080 case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; 1081 case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; 1082 case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; 1083 case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; 1084 case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; 1085 case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; 1086 case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; 1087 case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; 1088 case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; 1089 case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; 1090 case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; 1091 case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; 1092 case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; 1093 case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; 1094 case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; 1095 case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; 1096 case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; 1097 case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; 1098 case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; 1099 case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; 1100 case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; 1101 case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; 1102 case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; 1103 case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; 1104 case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; 1105 case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; 1106 case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; 1107 case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; 1108 case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; 1109 case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; 1110 case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; 1111 case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; 1112 case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; 1113 case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; 1114 case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; 1115 case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; 1116 case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; 1117 case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; 1118 case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; 1119 1120 case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; 1121 case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; 1122 case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; 1123 case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; 1124 case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; 1125 case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; 1126 case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; 1127 case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; 1128 case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; 1129 case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; 1130 case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; 1131 case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; 1132 case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; 1133 case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; 1134 case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; 1135 case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; 1136 case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; 1137 case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; 1138 case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; 1139 case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; 1140 case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; 1141 case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; 1142 case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; 1143 case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; 1144 case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; 1145 case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; 1146 case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; 1147 case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; 1148 case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; 1149 case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; 1150 case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; 1151 case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; 1152 case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; 1153 case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; 1154 case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; 1155 case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; 1156 1157 case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; 1158 case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; 1159 case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; 1160 case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; 1161 case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; 1162 case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; 1163 case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; 1164 case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; 1165 1166 case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; 1167 case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; 1168 case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; 1169 case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; 1170 case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; 1171 case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; 1172 case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; 1173 case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; 1174 1175 case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; 1176 case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; 1177 case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; 1178 case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; 1179 case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; 1180 case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; 1181 case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; 1182 case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; 1183 1184 case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; 1185 case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; 1186 case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; 1187 case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; 1188 case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; 1189 case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; 1190 case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; 1191 case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; 1192 1193 case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; 1194 case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; 1195 case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; 1196 case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; 1197 case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; 1198 case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; 1199 case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; 1200 case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; 1201 1202 case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; 1203 case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; 1204 case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; 1205 case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; 1206 case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; 1207 case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; 1208 case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; 1209 case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; 1210 1211 case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; 1212 case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; 1213 case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; 1214 case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; 1215 case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; 1216 case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; 1217 case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; 1218 case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; 1219 1220 case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; 1221 case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; 1222 case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; 1223 case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; 1224 case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; 1225 case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; 1226 case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; 1227 case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; 1228 1229 case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; 1230 case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; 1231 case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; 1232 case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; 1233 case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; 1234 case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; 1235 case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; 1236 case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; 1237 1238 case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; 1239 case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; 1240 case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; 1241 case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; 1242 case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; 1243 case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; 1244 case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; 1245 case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; 1246 } 1247 return 0; 1248} 1249