1//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass converts operations on vector types to operations on their
11// element types.
12//
13// For generic binary and unary vector instructions, the conversion is simple.
14// Suppose we have
15//        av = bv Vop cv
16// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
17// This gets converted to the following :
18//       a1 = b1 Sop c1
19//       a2 = b2 Sop c2
20//
21// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
22// For the above example, the map will look as follows:
23// av => [a1, a2]
24// bv => [b1, b2]
25//
26// In addition, initVectorInfo creates the following opcode->opcode map.
27// Vop => Sop
28// OtherVop => OtherSop
29// ...
30//
31// For vector specific instructions like vecbuild, vecshuffle etc, the
32// conversion is different. Look at comments near the functions with
33// prefix createVec<...>.
34//
35//===----------------------------------------------------------------------===//
36
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/Constant.h"
39#include "llvm/Instructions.h"
40#include "llvm/Function.h"
41#include "llvm/Pass.h"
42#include "llvm/Type.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/CodeGen/MachineFunctionPass.h"
45#include "llvm/CodeGen/MachineModuleInfo.h"
46#include "llvm/CodeGen/MachineRegisterInfo.h"
47#include "llvm/CodeGen/MachineInstrBuilder.h"
48#include "llvm/Support/CFG.h"
49#include "llvm/Support/Compiler.h"
50#include "llvm/Target/TargetInstrInfo.h"
51#include "llvm/ADT/DepthFirstIterator.h"
52#include "llvm/ADT/SmallPtrSet.h"
53#include "NVPTX.h"
54#include "NVPTXTargetMachine.h"
55
56using namespace llvm;
57
58namespace {
59
60class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
61  virtual bool runOnMachineFunction(MachineFunction &F);
62
63  NVPTXTargetMachine &TM;
64  MachineRegisterInfo *MRI;
65  const NVPTXRegisterInfo *RegInfo;
66  const NVPTXInstrInfo *InstrInfo;
67
68  llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
69  RegClassMap;
70  llvm::DenseMap<unsigned, bool> SimpleMoveMap;
71
72  llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
73
74  bool isVectorInstr(MachineInstr *);
75
76  SmallVector<unsigned, 4> getScalarRegisters(unsigned);
77  unsigned getScalarVersion(unsigned);
78  unsigned getScalarVersion(MachineInstr *);
79
80  bool isVectorRegister(unsigned);
81  const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
82  unsigned numCopiesNeeded(MachineInstr *);
83
84  void createLoadCopy(MachineFunction&, MachineInstr *,
85                      std::vector<MachineInstr *>&);
86  void createStoreCopy(MachineFunction&, MachineInstr *,
87                       std::vector<MachineInstr *>&);
88
89  void createVecDest(MachineFunction&, MachineInstr *,
90                     std::vector<MachineInstr *>&);
91
92  void createCopies(MachineFunction&, MachineInstr *,
93                    std::vector<MachineInstr *>&);
94
95  unsigned copyProp(MachineFunction&);
96  unsigned removeDeadMoves(MachineFunction&);
97
98  void elementize(MachineFunction&);
99
100  bool isSimpleMove(MachineInstr *);
101
102  void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
103                        std::vector<MachineInstr *>& copies);
104
105  void createVecExtract(MachineFunction& F, MachineInstr *Instr,
106                        std::vector<MachineInstr *>& copies);
107
108  void createVecInsert(MachineFunction& F, MachineInstr *Instr,
109                       std::vector<MachineInstr *>& copies);
110
111  void createVecBuild(MachineFunction& F, MachineInstr *Instr,
112                      std::vector<MachineInstr *>& copies);
113
114public:
115
116  static char ID; // Pass identification, replacement for typeid
117  VectorElementize(NVPTXTargetMachine &tm)
118  : MachineFunctionPass(ID), TM(tm) {}
119
120  virtual const char *getPassName() const {
121    return "Convert LLVM vector types to their element types";
122  }
123};
124
125char VectorElementize::ID = 1;
126}
127
128static cl::opt<bool>
129RemoveRedundantMoves("nvptx-remove-redundant-moves",
130       cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
131                     cl::init(true));
132
133#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
134    >> NVPTX::VecInstTypeShift)
135#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
136#define ISVECLOAD(x)    (VECINST(x) == NVPTX::VecLoad)
137#define ISVECSTORE(x)   (VECINST(x) == NVPTX::VecStore)
138#define ISVECBUILD(x)   (VECINST(x) == NVPTX::VecBuild)
139#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
140#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
141#define ISVECINSERT(x)  (VECINST(x) == NVPTX::VecInsert)
142#define ISVECDEST(x)     (VECINST(x) == NVPTX::VecDest)
143
144bool VectorElementize::isSimpleMove(MachineInstr *mi) {
145  if (mi->isCopy())
146    return true;
147  unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
148        >> NVPTX::SimpleMoveShift;
149  return (TSFlags == 1);
150}
151
152bool VectorElementize::isVectorInstr(MachineInstr *mi) {
153  if ((mi->getOpcode() == NVPTX::PHI) ||
154      (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
155    MachineOperand dest = mi->getOperand(0);
156    return isVectorRegister(dest.getReg());
157  }
158  return ISVECINST(mi);
159}
160
161unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
162  return getScalarVersion(mi->getOpcode());
163}
164
165///=============================================================================
166///Instr is assumed to be a vector instruction. For most vector instructions,
167///the size of the destination vector register gives the number of scalar copies
168///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
169///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
170///number of scalar copies needed.
171///=============================================================================
172unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
173  unsigned numDefs=0;
174  unsigned def;
175  for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
176    MachineOperand oper = Instr->getOperand(i);
177
178    if (!oper.isReg()) continue;
179    if (!oper.isDef()) continue;
180    def = i;
181    numDefs++;
182  }
183  assert((numDefs <= 1) && "Only 0 or 1 defs supported");
184
185  if (numDefs == 1) {
186    unsigned regnum = Instr->getOperand(def).getReg();
187    if (ISVECEXTRACT(Instr))
188      regnum = Instr->getOperand(1).getReg();
189    return getNVPTXVectorSize(MRI->getRegClass(regnum));
190  }
191  else if (numDefs == 0) {
192    assert(ISVECSTORE(Instr)
193           && "Only 0 def instruction supported is vector store");
194
195    unsigned regnum = Instr->getOperand(0).getReg();
196    return getNVPTXVectorSize(MRI->getRegClass(regnum));
197  }
198  return 1;
199}
200
201const TargetRegisterClass *VectorElementize::
202getScalarRegClass(const TargetRegisterClass *RC) {
203  assert(isNVPTXVectorRegClass(RC) &&
204         "Not a vector register class");
205  return getNVPTXElemClass(RC);
206}
207
208bool VectorElementize::isVectorRegister(unsigned reg) {
209  const TargetRegisterClass *RC=MRI->getRegClass(reg);
210  return isNVPTXVectorRegClass(RC);
211}
212
213///=============================================================================
214///For every vector register 'v' that is not already in the VectorToScalarMap,
215///create n scalar registers of the corresponding element type, where n
216///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
217///=============================================================================
218SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
219  assert(isVectorRegister(regnum) && "Expecting a vector register here");
220  // Create the scalar registers and put them in the map, if not already there.
221  if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
222    const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
223    const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
224
225    SmallVector<unsigned, 4> temp;
226
227    for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
228      temp.push_back(MRI->createVirtualRegister(scalarClass));
229
230    VectorToScalarMap[regnum] = temp;
231  }
232  return VectorToScalarMap[regnum];
233}
234
235///=============================================================================
236///For a vector load of the form
237///va <= ldv2 [addr]
238///the following multi output instruction is created :
239///[v1, v2] <= LD [addr]
240///Look at NVPTXVector.td for the definitions of multi output loads.
241///=============================================================================
242void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
243                                      std::vector<MachineInstr *>& copies) {
244  copies.push_back(F.CloneMachineInstr(Instr));
245
246  MachineInstr *copy=copies[0];
247  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
248
249  // Remove the dest, that should be a vector operand.
250  MachineOperand dest = copy->getOperand(0);
251  unsigned regnum = dest.getReg();
252
253  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
254  copy->RemoveOperand(0);
255
256  std::vector<MachineOperand> otherOperands;
257  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
258    otherOperands.push_back(copy->getOperand(i));
259
260  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
261    copy->RemoveOperand(0);
262
263  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
264    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
265  }
266
267  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
268    copy->addOperand(otherOperands[i]);
269
270}
271
272///=============================================================================
273///For a vector store of the form
274///stv2 va, [addr]
275///the following multi input instruction is created :
276///ST v1, v2, [addr]
277///Look at NVPTXVector.td for the definitions of multi input stores.
278///=============================================================================
279void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
280                                       std::vector<MachineInstr *>& copies) {
281  copies.push_back(F.CloneMachineInstr(Instr));
282
283  MachineInstr *copy=copies[0];
284  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
285
286  MachineOperand src = copy->getOperand(0);
287  unsigned regnum = src.getReg();
288
289  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
290  copy->RemoveOperand(0);
291
292  std::vector<MachineOperand> otherOperands;
293  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
294    otherOperands.push_back(copy->getOperand(i));
295
296  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
297    copy->RemoveOperand(0);
298
299  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
300    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
301
302  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
303    copy->addOperand(otherOperands[i]);
304}
305
306///=============================================================================
307///va <= shufflev2 vb, vc, <i1>, <i2>
308///gets converted to 2 moves into a1 and a2. The source of the moves depend on
309///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
310///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
311///instructions will be
312///a1 <= c2
313///a2 <= b1
314///=============================================================================
315void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
316                                        std::vector<MachineInstr *>& copies) {
317  unsigned numcopies=numCopiesNeeded(Instr);
318
319  unsigned destregnum = Instr->getOperand(0).getReg();
320  unsigned src1regnum = Instr->getOperand(1).getReg();
321  unsigned src2regnum = Instr->getOperand(2).getReg();
322
323  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
324  SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
325  SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
326
327  DebugLoc DL = Instr->getDebugLoc();
328
329  for (unsigned i=0; i<numcopies; i++) {
330    MachineInstr *copy = BuildMI(F, DL,
331                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
332    MachineOperand which=Instr->getOperand(3+i);
333    assert(which.isImm() && "Shuffle operand not a constant");
334
335    int src=which.getImm();
336    int elem=src%numcopies;
337
338    if (which.getImm() < numcopies)
339      copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
340    else
341      copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
342    copies.push_back(copy);
343  }
344}
345
346///=============================================================================
347///a <= extractv2 va, <i1>
348///gets turned into a simple move to the scalar register a. The source depends
349///on i1.
350///=============================================================================
351void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
352                                        std::vector<MachineInstr *>& copies) {
353  unsigned srcregnum = Instr->getOperand(1).getReg();
354
355  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
356
357  MachineOperand which = Instr->getOperand(2);
358  assert(which.isImm() && "Extract operand not a constant");
359
360  DebugLoc DL = Instr->getDebugLoc();
361
362  MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
363                               Instr->getOperand(0).getReg());
364  copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
365
366  copies.push_back(copy);
367}
368
369///=============================================================================
370///va <= vecinsertv2 vb, c, <i1>
371///This instruction copies all elements of vb to va, except the 'i1'th element.
372///The scalar value c becomes the 'i1'th element of va.
373///This gets translated to 2 (4 for vecinsertv4) moves.
374///=============================================================================
375void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
376                                       std::vector<MachineInstr *>& copies) {
377  unsigned numcopies=numCopiesNeeded(Instr);
378
379  unsigned destregnum = Instr->getOperand(0).getReg();
380  unsigned srcregnum = Instr->getOperand(1).getReg();
381
382  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
383  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
384
385  MachineOperand which=Instr->getOperand(3);
386  assert(which.isImm() && "Insert operand not a constant");
387  unsigned int elem=which.getImm();
388
389  DebugLoc DL = Instr->getDebugLoc();
390
391  for (unsigned i=0; i<numcopies; i++) {
392    MachineInstr *copy = BuildMI(F, DL,
393                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
394
395    if (i != elem)
396      copy->addOperand(MachineOperand::CreateReg(src[i], false));
397    else
398      copy->addOperand(Instr->getOperand(2));
399
400    copies.push_back(copy);
401  }
402
403}
404
405///=============================================================================
406///va <= buildv2 b1, b2
407///gets translated to
408///a1 <= b1
409///a2 <= b2
410///=============================================================================
411void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
412                                      std::vector<MachineInstr *>& copies) {
413  unsigned numcopies=numCopiesNeeded(Instr);
414
415  unsigned destregnum = Instr->getOperand(0).getReg();
416
417  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
418
419  DebugLoc DL = Instr->getDebugLoc();
420
421  for (unsigned i=0; i<numcopies; i++) {
422    MachineInstr *copy = BuildMI(F, DL,
423                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
424
425    copy->addOperand(Instr->getOperand(1+i));
426
427    copies.push_back(copy);
428  }
429
430}
431
432///=============================================================================
433///For a tex inst of the form
434///va <= op [scalar operands]
435///the following multi output instruction is created :
436///[v1, v2] <= op' [scalar operands]
437///=============================================================================
438void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
439                                     std::vector<MachineInstr *>& copies) {
440  copies.push_back(F.CloneMachineInstr(Instr));
441
442  MachineInstr *copy=copies[0];
443  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
444
445  // Remove the dest, that should be a vector operand.
446  MachineOperand dest = copy->getOperand(0);
447  unsigned regnum = dest.getReg();
448
449  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
450  copy->RemoveOperand(0);
451
452  std::vector<MachineOperand> otherOperands;
453  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
454    otherOperands.push_back(copy->getOperand(i));
455
456  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
457    copy->RemoveOperand(0);
458
459  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
460    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
461
462  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
463    copy->addOperand(otherOperands[i]);
464}
465
466///=============================================================================
467///Look at the vector instruction type and dispatch to the createVec<...>
468///function that creates the scalar copies.
469///=============================================================================
470void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
471                                    std::vector<MachineInstr *>& copies) {
472  if (ISVECLOAD(Instr)) {
473    createLoadCopy(F, Instr, copies);
474    return;
475  }
476  if (ISVECSTORE(Instr)) {
477    createStoreCopy(F, Instr, copies);
478    return;
479  }
480  if (ISVECSHUFFLE(Instr)) {
481    createVecShuffle(F, Instr, copies);
482    return;
483  }
484  if (ISVECEXTRACT(Instr)) {
485    createVecExtract(F, Instr, copies);
486    return;
487  }
488  if (ISVECINSERT(Instr)) {
489    createVecInsert(F, Instr, copies);
490    return;
491  }
492  if (ISVECDEST(Instr)) {
493    createVecDest(F, Instr, copies);
494    return;
495  }
496  if (ISVECBUILD(Instr)) {
497    createVecBuild(F, Instr, copies);
498    return;
499  }
500
501  unsigned numcopies=numCopiesNeeded(Instr);
502
503  for (unsigned i=0; i<numcopies; ++i)
504    copies.push_back(F.CloneMachineInstr(Instr));
505
506  for (unsigned i=0; i<numcopies; ++i) {
507    MachineInstr *copy = copies[i];
508
509    std::vector<MachineOperand> allOperands;
510    std::vector<bool> isDef;
511
512    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
513      MachineOperand oper = copy->getOperand(j);
514      allOperands.push_back(oper);
515      if (oper.isReg())
516        isDef.push_back(oper.isDef());
517      else
518        isDef.push_back(false);
519    }
520
521    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
522      copy->RemoveOperand(0);
523
524    copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
525
526    for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
527      MachineOperand oper=allOperands[j];
528      if (oper.isReg()) {
529        unsigned regnum = oper.getReg();
530        if (isVectorRegister(regnum)) {
531
532          SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
533          copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
534        }
535        else
536          copy->addOperand(oper);
537      }
538      else
539        copy->addOperand(oper);
540    }
541  }
542}
543
544///=============================================================================
545///Scan through all basic blocks, looking for vector instructions.
546///For each vector instruction I, insert the scalar copies before I, and
547///add I into toRemove vector. Finally remove all instructions in toRemove.
548///=============================================================================
549void VectorElementize::elementize(MachineFunction &F) {
550  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
551      BI!=BE; ++BI) {
552    MachineBasicBlock *BB = &*BI;
553
554    std::vector<MachineInstr *> copies;
555    std::vector<MachineInstr *> toRemove;
556
557    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
558        II!=IE; ++II) {
559      MachineInstr *Instr = &*II;
560
561      if (!isVectorInstr(Instr))
562        continue;
563
564      copies.clear();
565      createCopies(F, Instr, copies);
566      for (unsigned i=0, e=copies.size(); i!=e; ++i)
567        BB->insert(II, copies[i]);
568
569      assert((copies.size() > 0) && "Problem in createCopies");
570      toRemove.push_back(Instr);
571    }
572    for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
573      F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
574  }
575}
576
577///=============================================================================
578///a <= b
579///...
580///...
581///x <= op(a, ...)
582///gets converted to
583///
584///x <= op(b, ...)
585///The original move is still present. This works on SSA form machine code.
586///Note that a <= b should be a simple vreg-to-vreg move instruction.
587///TBD : I didn't find a function that can do replaceOperand, so I remove
588///all operands and add all of them again, replacing the one while adding.
589///=============================================================================
590unsigned VectorElementize::copyProp(MachineFunction &F) {
591  unsigned numReplacements = 0;
592
593  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
594      ++BI) {
595    MachineBasicBlock *BB = &*BI;
596
597    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
598        ++II) {
599      MachineInstr *Instr = &*II;
600
601      // Don't do copy propagation on PHI as it will cause unnecessary
602      // live range overlap.
603      if ((Instr->getOpcode() == TargetOpcode::PHI) ||
604          (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
605        continue;
606
607      bool needsReplacement = false;
608
609      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
610        MachineOperand oper = Instr->getOperand(i);
611        if (!oper.isReg()) continue;
612        if (oper.isDef()) continue;
613        if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
614
615        MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
616
617        if (!defInstr) continue;
618
619        if (!isSimpleMove(defInstr)) continue;
620
621        MachineOperand defSrc = defInstr->getOperand(1);
622        if (!defSrc.isReg()) continue;
623        if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
624
625        needsReplacement = true;
626
627      }
628      if (!needsReplacement) continue;
629
630      numReplacements++;
631
632      std::vector<MachineOperand> operands;
633
634      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
635        MachineOperand oper = Instr->getOperand(i);
636        bool flag = false;
637        do {
638          if (!(oper.isReg()))
639            break;
640          if (oper.isDef())
641            break;
642          if (!(RegInfo->isVirtualRegister(oper.getReg())))
643            break;
644          MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
645          if (!(isSimpleMove(defInstr)))
646            break;
647          MachineOperand defSrc = defInstr->getOperand(1);
648          if (!(defSrc.isReg()))
649            break;
650          if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
651            break;
652          operands.push_back(defSrc);
653          flag = true;
654        } while (0);
655        if (flag == false)
656          operands.push_back(oper);
657      }
658
659      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
660        Instr->RemoveOperand(0);
661      for (unsigned i=0, e=operands.size(); i!=e; ++i)
662        Instr->addOperand(operands[i]);
663
664    }
665  }
666  return numReplacements;
667}
668
669///=============================================================================
670///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
671///them to deadMoves vector. Then remove all instructions in deadMoves.
672///=============================================================================
673unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
674  std::vector<MachineInstr *> deadMoves;
675  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
676      ++BI) {
677    MachineBasicBlock *BB = &*BI;
678
679    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
680        ++II) {
681      MachineInstr *Instr = &*II;
682
683      if (!isSimpleMove(Instr)) continue;
684
685      MachineOperand dest = Instr->getOperand(0);
686      assert(dest.isReg() && "dest of move not a register");
687      assert(RegInfo->isVirtualRegister(dest.getReg()) &&
688             "dest of move not a virtual register");
689
690      if (MRI->use_empty(dest.getReg())) {
691        deadMoves.push_back(Instr);
692      }
693    }
694  }
695
696  for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
697    F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
698
699  return deadMoves.size();
700}
701
702///=============================================================================
703///Main function for this pass.
704///=============================================================================
705bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
706  MRI = &F.getRegInfo();
707
708  RegInfo = TM.getRegisterInfo();
709  InstrInfo = TM.getInstrInfo();
710
711  VectorToScalarMap.clear();
712
713  elementize(F);
714
715  if (RemoveRedundantMoves)
716    while (1) {
717      if (copyProp(F) == 0) break;
718      removeDeadMoves(F);
719    }
720
721  return true;
722}
723
724FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
725  return new VectorElementize(tm);
726}
727
728unsigned VectorElementize::getScalarVersion(unsigned opcode) {
729  if (opcode == NVPTX::PHI)
730    return opcode;
731  if (opcode == NVPTX::IMPLICIT_DEF)
732    return opcode;
733  switch(opcode) {
734  default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
735  case TargetOpcode::COPY: return TargetOpcode::COPY;
736  case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
737  case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
738  case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
739  case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
740  case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
741  case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
742  case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
743  case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
744  case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
745  case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
746  case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
747  case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
748  case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
749  case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
750  case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
751  case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
752  case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
753  case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
754  case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
755  case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
756  case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
757  case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
758  case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
759  case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
760  case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
761  case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
762  case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
763  case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
764  case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
765  case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
766  case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
767  case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
768  case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
769  case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
770  case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
771  case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
772  case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
773  case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
774  case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
775  case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
776  case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
777  case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
778  case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
779  case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
780  case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
781  case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
782  case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
783  case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
784  case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
785  case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
786  case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
787  case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
788  case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
789  case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
790  case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
791  case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
792  case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
793  case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
794  case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
795  case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
796  case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
797  case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
798  case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
799  case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
800  case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
801  case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
802  case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
803  case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
804  case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
805  case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
806  case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
807  case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
808  case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
809  case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
810  case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
811  case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
812  case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
813  case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
814  case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
815  case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
816  case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
817  case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
818  case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
819  case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
820  case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
821  case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
822  case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
823  case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
824  case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
825  case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
826  case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
827  case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
828  case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
829  case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
830  case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
831  case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
832  case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
833  case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
834  case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
835  case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
836  case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
837  case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
838  case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
839  case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
840  case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
841  case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
842  case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
843  case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
844  case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
845  case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
846  case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
847  case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
848  case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
849  case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
850  case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
851  case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
852  case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
853  case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
854  case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
855  case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
856  case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
857  case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
858  case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
859  case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
860  case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
861  case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
862  case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
863  case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
864  case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
865  case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
866  case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
867  case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
868  case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
869  case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
870  case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
871  case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
872  case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
873  case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
874  case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
875  case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
876  case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
877  case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
878  case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
879  case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
880  case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
881  case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
882  case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
883  case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
884  case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
885  case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
886  case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
887  case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
888  case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
889  case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
890  case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
891  case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
892  case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
893  case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
894  case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
895  case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
896  case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
897  case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
898  case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
899  case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
900  case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
901  case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
902  case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
903  case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
904  case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
905  case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
906  case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
907  case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
908  case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
909  case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
910  case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
911  case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
912  case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
913  case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
914  case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
915  case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
916  case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
917  case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
918  case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
919  case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
920  case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
921  case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
922  case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
923  case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
924  case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
925  case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
926  case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
927  case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
928  case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
929  case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
930  case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
931  case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
932  case NVPTX::VNegV2I16: return NVPTX::INEG16;
933  case NVPTX::VNegV2I32: return NVPTX::INEG32;
934  case NVPTX::VNegV2I64: return NVPTX::INEG64;
935  case NVPTX::VNegV2I8: return NVPTX::INEG8;
936  case NVPTX::VNegV4I16: return NVPTX::INEG16;
937  case NVPTX::VNegV4I32: return NVPTX::INEG32;
938  case NVPTX::VNegV4I8: return NVPTX::INEG8;
939  case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
940  case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
941  case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
942  case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
943  case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
944  case NVPTX::VNotV2I16: return NVPTX::NOT16;
945  case NVPTX::VNotV2I32: return NVPTX::NOT32;
946  case NVPTX::VNotV2I64: return NVPTX::NOT64;
947  case NVPTX::VNotV2I8: return NVPTX::NOT8;
948  case NVPTX::VNotV4I16: return NVPTX::NOT16;
949  case NVPTX::VNotV4I32: return NVPTX::NOT32;
950  case NVPTX::VNotV4I8: return NVPTX::NOT8;
951  case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
952  case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
953  case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
954  case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
955  case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
956  case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
957  case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
958  case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
959  case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
960  case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
961  case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
962  case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
963  case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
964  case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
965  case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
966  case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
967  case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
968  case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
969  case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
970  case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
971  case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
972  case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
973  case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
974  case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
975  case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
976  case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
977  case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
978  case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
979  case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
980  case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
981  case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
982  case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
983  case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
984  case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
985  case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
986  case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
987  case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
988  case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
989  case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
990  case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
991  case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
992  case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
993  case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
994  case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
995  case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
996  case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
997  case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
998  case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
999  case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
1000  case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
1001  case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
1002  case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
1003  case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
1004  case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
1005  case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
1006  case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
1007  case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
1008  case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
1009  case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
1010  case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
1011  case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
1012  case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
1013  case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
1014  case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
1015  case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
1016  case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
1017  case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
1018  case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
1019  case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
1020  case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
1021  case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
1022  case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
1023  case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
1024  case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
1025  case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
1026  case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
1027  case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
1028  case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
1029  case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
1030  case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
1031  case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
1032  case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
1033  case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
1034  case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
1035  case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
1036  case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
1037  case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
1038  case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
1039  case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
1040  case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
1041  case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
1042  case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
1043  case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
1044  case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
1045  case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
1046  case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
1047  case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
1048  case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
1049  case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
1050  case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
1051  case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
1052  case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
1053  case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
1054  case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
1055  case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
1056  case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
1057  case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
1058  case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
1059  case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
1060  case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
1061  case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
1062  case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
1063  case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
1064  case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
1065  case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
1066  case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
1067  case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
1068  case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
1069  case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
1070  case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
1071  case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
1072  case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
1073  case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
1074  case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
1075  case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
1076  case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
1077  case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
1078  case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
1079  case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
1080  case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
1081  case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
1082  case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
1083  case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
1084  case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
1085  case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
1086  case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
1087  case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
1088  case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
1089  case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
1090  case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
1091  case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
1092  case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
1093  case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
1094  case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
1095  case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
1096  case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
1097  case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
1098  case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
1099  case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
1100  case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
1101  case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
1102  case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
1103  case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
1104  case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
1105  case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
1106  case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
1107  case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
1108  case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
1109  case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
1110  case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
1111  case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
1112  case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
1113  case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
1114  case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
1115  case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
1116  case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
1117  case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
1118  case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
1119
1120  case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
1121  case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
1122  case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
1123  case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
1124  case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
1125  case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
1126  case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
1127  case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
1128  case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
1129  case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
1130  case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
1131  case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
1132  case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
1133  case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
1134  case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
1135  case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
1136  case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
1137  case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
1138  case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
1139  case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
1140  case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
1141  case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
1142  case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
1143  case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
1144  case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
1145  case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
1146  case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
1147  case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
1148  case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
1149  case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
1150  case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
1151  case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
1152  case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
1153  case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
1154  case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
1155  case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
1156
1157  case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
1158  case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
1159  case NVPTX::LD_v2i8_ari:  return NVPTX::LDV_i8_v2_ari;
1160  case NVPTX::LD_v2i8_asi:  return NVPTX::LDV_i8_v2_asi;
1161  case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
1162  case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
1163  case NVPTX::LD_v4i8_ari:  return NVPTX::LDV_i8_v4_ari;
1164  case NVPTX::LD_v4i8_asi:  return NVPTX::LDV_i8_v4_asi;
1165
1166  case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
1167  case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
1168  case NVPTX::LD_v2i16_ari:  return NVPTX::LDV_i16_v2_ari;
1169  case NVPTX::LD_v2i16_asi:  return NVPTX::LDV_i16_v2_asi;
1170  case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
1171  case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
1172  case NVPTX::LD_v4i16_ari:  return NVPTX::LDV_i16_v4_ari;
1173  case NVPTX::LD_v4i16_asi:  return NVPTX::LDV_i16_v4_asi;
1174
1175  case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
1176  case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
1177  case NVPTX::LD_v2i32_ari:  return NVPTX::LDV_i32_v2_ari;
1178  case NVPTX::LD_v2i32_asi:  return NVPTX::LDV_i32_v2_asi;
1179  case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
1180  case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
1181  case NVPTX::LD_v4i32_ari:  return NVPTX::LDV_i32_v4_ari;
1182  case NVPTX::LD_v4i32_asi:  return NVPTX::LDV_i32_v4_asi;
1183
1184  case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
1185  case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
1186  case NVPTX::LD_v2f32_ari:  return NVPTX::LDV_f32_v2_ari;
1187  case NVPTX::LD_v2f32_asi:  return NVPTX::LDV_f32_v2_asi;
1188  case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
1189  case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
1190  case NVPTX::LD_v4f32_ari:  return NVPTX::LDV_f32_v4_ari;
1191  case NVPTX::LD_v4f32_asi:  return NVPTX::LDV_f32_v4_asi;
1192
1193  case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
1194  case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
1195  case NVPTX::LD_v2i64_ari:  return NVPTX::LDV_i64_v2_ari;
1196  case NVPTX::LD_v2i64_asi:  return NVPTX::LDV_i64_v2_asi;
1197  case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
1198  case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
1199  case NVPTX::LD_v2f64_ari:  return NVPTX::LDV_f64_v2_ari;
1200  case NVPTX::LD_v2f64_asi:  return NVPTX::LDV_f64_v2_asi;
1201
1202  case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
1203  case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
1204  case NVPTX::ST_v2i8_ari:  return NVPTX::STV_i8_v2_ari;
1205  case NVPTX::ST_v2i8_asi:  return NVPTX::STV_i8_v2_asi;
1206  case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
1207  case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
1208  case NVPTX::ST_v4i8_ari:  return NVPTX::STV_i8_v4_ari;
1209  case NVPTX::ST_v4i8_asi:  return NVPTX::STV_i8_v4_asi;
1210
1211  case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
1212  case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
1213  case NVPTX::ST_v2i16_ari:  return NVPTX::STV_i16_v2_ari;
1214  case NVPTX::ST_v2i16_asi:  return NVPTX::STV_i16_v2_asi;
1215  case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
1216  case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
1217  case NVPTX::ST_v4i16_ari:  return NVPTX::STV_i16_v4_ari;
1218  case NVPTX::ST_v4i16_asi:  return NVPTX::STV_i16_v4_asi;
1219
1220  case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
1221  case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
1222  case NVPTX::ST_v2i32_ari:  return NVPTX::STV_i32_v2_ari;
1223  case NVPTX::ST_v2i32_asi:  return NVPTX::STV_i32_v2_asi;
1224  case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
1225  case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
1226  case NVPTX::ST_v4i32_ari:  return NVPTX::STV_i32_v4_ari;
1227  case NVPTX::ST_v4i32_asi:  return NVPTX::STV_i32_v4_asi;
1228
1229  case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
1230  case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
1231  case NVPTX::ST_v2f32_ari:  return NVPTX::STV_f32_v2_ari;
1232  case NVPTX::ST_v2f32_asi:  return NVPTX::STV_f32_v2_asi;
1233  case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
1234  case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
1235  case NVPTX::ST_v4f32_ari:  return NVPTX::STV_f32_v4_ari;
1236  case NVPTX::ST_v4f32_asi:  return NVPTX::STV_f32_v4_asi;
1237
1238  case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
1239  case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
1240  case NVPTX::ST_v2i64_ari:  return NVPTX::STV_i64_v2_ari;
1241  case NVPTX::ST_v2i64_asi:  return NVPTX::STV_i64_v2_asi;
1242  case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
1243  case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
1244  case NVPTX::ST_v2f64_ari:  return NVPTX::STV_f64_v2_ari;
1245  case NVPTX::ST_v2f64_asi:  return NVPTX::STV_f64_v2_asi;
1246  }
1247  return 0;
1248}
1249