AMDGPUAsmParser.cpp revision 344779
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AMDGPU.h"
11#include "AMDKernelCodeT.h"
12#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13#include "MCTargetDesc/AMDGPUTargetStreamer.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "Utils/AMDGPUAsmUtils.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/ADT/StringSwitch.h"
27#include "llvm/ADT/Twine.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInst.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/MC/MCInstrInfo.h"
35#include "llvm/MC/MCParser/MCAsmLexer.h"
36#include "llvm/MC/MCParser/MCAsmParser.h"
37#include "llvm/MC/MCParser/MCAsmParserExtension.h"
38#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39#include "llvm/MC/MCParser/MCTargetAsmParser.h"
40#include "llvm/MC/MCRegisterInfo.h"
41#include "llvm/MC/MCStreamer.h"
42#include "llvm/MC/MCSubtargetInfo.h"
43#include "llvm/MC/MCSymbol.h"
44#include "llvm/Support/AMDGPUMetadata.h"
45#include "llvm/Support/AMDHSAKernelDescriptor.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/Compiler.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/MachineValueType.h"
50#include "llvm/Support/MathExtras.h"
51#include "llvm/Support/SMLoc.h"
52#include "llvm/Support/TargetParser.h"
53#include "llvm/Support/TargetRegistry.h"
54#include "llvm/Support/raw_ostream.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <cstring>
59#include <iterator>
60#include <map>
61#include <memory>
62#include <string>
63
64using namespace llvm;
65using namespace llvm::AMDGPU;
66using namespace llvm::amdhsa;
67
68namespace {
69
70class AMDGPUAsmParser;
71
72enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73
74//===----------------------------------------------------------------------===//
75// Operand
76//===----------------------------------------------------------------------===//
77
78class AMDGPUOperand : public MCParsedAsmOperand {
79  enum KindTy {
80    Token,
81    Immediate,
82    Register,
83    Expression
84  } Kind;
85
86  SMLoc StartLoc, EndLoc;
87  const AMDGPUAsmParser *AsmParser;
88
89public:
90  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91    : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92
93  using Ptr = std::unique_ptr<AMDGPUOperand>;
94
95  struct Modifiers {
96    bool Abs = false;
97    bool Neg = false;
98    bool Sext = false;
99
100    bool hasFPModifiers() const { return Abs || Neg; }
101    bool hasIntModifiers() const { return Sext; }
102    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103
104    int64_t getFPModifiersOperand() const {
105      int64_t Operand = 0;
106      Operand |= Abs ? SISrcMods::ABS : 0;
107      Operand |= Neg ? SISrcMods::NEG : 0;
108      return Operand;
109    }
110
111    int64_t getIntModifiersOperand() const {
112      int64_t Operand = 0;
113      Operand |= Sext ? SISrcMods::SEXT : 0;
114      return Operand;
115    }
116
117    int64_t getModifiersOperand() const {
118      assert(!(hasFPModifiers() && hasIntModifiers())
119           && "fp and int modifiers should not be used simultaneously");
120      if (hasFPModifiers()) {
121        return getFPModifiersOperand();
122      } else if (hasIntModifiers()) {
123        return getIntModifiersOperand();
124      } else {
125        return 0;
126      }
127    }
128
129    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130  };
131
132  enum ImmTy {
133    ImmTyNone,
134    ImmTyGDS,
135    ImmTyLDS,
136    ImmTyOffen,
137    ImmTyIdxen,
138    ImmTyAddr64,
139    ImmTyOffset,
140    ImmTyInstOffset,
141    ImmTyOffset0,
142    ImmTyOffset1,
143    ImmTyGLC,
144    ImmTySLC,
145    ImmTyTFE,
146    ImmTyD16,
147    ImmTyClampSI,
148    ImmTyOModSI,
149    ImmTyDppCtrl,
150    ImmTyDppRowMask,
151    ImmTyDppBankMask,
152    ImmTyDppBoundCtrl,
153    ImmTySdwaDstSel,
154    ImmTySdwaSrc0Sel,
155    ImmTySdwaSrc1Sel,
156    ImmTySdwaDstUnused,
157    ImmTyDMask,
158    ImmTyUNorm,
159    ImmTyDA,
160    ImmTyR128A16,
161    ImmTyLWE,
162    ImmTyExpTgt,
163    ImmTyExpCompr,
164    ImmTyExpVM,
165    ImmTyFORMAT,
166    ImmTyHwreg,
167    ImmTyOff,
168    ImmTySendMsg,
169    ImmTyInterpSlot,
170    ImmTyInterpAttr,
171    ImmTyAttrChan,
172    ImmTyOpSel,
173    ImmTyOpSelHi,
174    ImmTyNegLo,
175    ImmTyNegHi,
176    ImmTySwizzle,
177    ImmTyHigh
178  };
179
180  struct TokOp {
181    const char *Data;
182    unsigned Length;
183  };
184
185  struct ImmOp {
186    int64_t Val;
187    ImmTy Type;
188    bool IsFPImm;
189    Modifiers Mods;
190  };
191
192  struct RegOp {
193    unsigned RegNo;
194    bool IsForcedVOP3;
195    Modifiers Mods;
196  };
197
198  union {
199    TokOp Tok;
200    ImmOp Imm;
201    RegOp Reg;
202    const MCExpr *Expr;
203  };
204
205  bool isToken() const override {
206    if (Kind == Token)
207      return true;
208
209    if (Kind != Expression || !Expr)
210      return false;
211
212    // When parsing operands, we can't always tell if something was meant to be
213    // a token, like 'gds', or an expression that references a global variable.
214    // In this case, we assume the string is an expression, and if we need to
215    // interpret is a token, then we treat the symbol name as the token.
216    return isa<MCSymbolRefExpr>(Expr);
217  }
218
219  bool isImm() const override {
220    return Kind == Immediate;
221  }
222
223  bool isInlinableImm(MVT type) const;
224  bool isLiteralImm(MVT type) const;
225
226  bool isRegKind() const {
227    return Kind == Register;
228  }
229
230  bool isReg() const override {
231    return isRegKind() && !hasModifiers();
232  }
233
234  bool isRegOrImmWithInputMods(MVT type) const {
235    return isRegKind() || isInlinableImm(type);
236  }
237
238  bool isRegOrImmWithInt16InputMods() const {
239    return isRegOrImmWithInputMods(MVT::i16);
240  }
241
242  bool isRegOrImmWithInt32InputMods() const {
243    return isRegOrImmWithInputMods(MVT::i32);
244  }
245
246  bool isRegOrImmWithInt64InputMods() const {
247    return isRegOrImmWithInputMods(MVT::i64);
248  }
249
250  bool isRegOrImmWithFP16InputMods() const {
251    return isRegOrImmWithInputMods(MVT::f16);
252  }
253
254  bool isRegOrImmWithFP32InputMods() const {
255    return isRegOrImmWithInputMods(MVT::f32);
256  }
257
258  bool isRegOrImmWithFP64InputMods() const {
259    return isRegOrImmWithInputMods(MVT::f64);
260  }
261
262  bool isVReg() const {
263    return isRegClass(AMDGPU::VGPR_32RegClassID) ||
264           isRegClass(AMDGPU::VReg_64RegClassID) ||
265           isRegClass(AMDGPU::VReg_96RegClassID) ||
266           isRegClass(AMDGPU::VReg_128RegClassID) ||
267           isRegClass(AMDGPU::VReg_256RegClassID) ||
268           isRegClass(AMDGPU::VReg_512RegClassID);
269  }
270
271  bool isVReg32OrOff() const {
272    return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
273  }
274
275  bool isSDWAOperand(MVT type) const;
276  bool isSDWAFP16Operand() const;
277  bool isSDWAFP32Operand() const;
278  bool isSDWAInt16Operand() const;
279  bool isSDWAInt32Operand() const;
280
281  bool isImmTy(ImmTy ImmT) const {
282    return isImm() && Imm.Type == ImmT;
283  }
284
285  bool isImmModifier() const {
286    return isImm() && Imm.Type != ImmTyNone;
287  }
288
289  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
290  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
291  bool isDMask() const { return isImmTy(ImmTyDMask); }
292  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
293  bool isDA() const { return isImmTy(ImmTyDA); }
294  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
295  bool isLWE() const { return isImmTy(ImmTyLWE); }
296  bool isOff() const { return isImmTy(ImmTyOff); }
297  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
298  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
299  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
300  bool isOffen() const { return isImmTy(ImmTyOffen); }
301  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
302  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
303  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
304  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
305  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
306
307  bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
308  bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
309  bool isGDS() const { return isImmTy(ImmTyGDS); }
310  bool isLDS() const { return isImmTy(ImmTyLDS); }
311  bool isGLC() const { return isImmTy(ImmTyGLC); }
312  bool isSLC() const { return isImmTy(ImmTySLC); }
313  bool isTFE() const { return isImmTy(ImmTyTFE); }
314  bool isD16() const { return isImmTy(ImmTyD16); }
315  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
316  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
317  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
318  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
319  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
320  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
321  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
322  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
323  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
324  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
325  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
326  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
327  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
328  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
329  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
330  bool isHigh() const { return isImmTy(ImmTyHigh); }
331
332  bool isMod() const {
333    return isClampSI() || isOModSI();
334  }
335
336  bool isRegOrImm() const {
337    return isReg() || isImm();
338  }
339
340  bool isRegClass(unsigned RCID) const;
341
342  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
343    return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
344  }
345
346  bool isSCSrcB16() const {
347    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
348  }
349
350  bool isSCSrcV2B16() const {
351    return isSCSrcB16();
352  }
353
354  bool isSCSrcB32() const {
355    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
356  }
357
358  bool isSCSrcB64() const {
359    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
360  }
361
362  bool isSCSrcF16() const {
363    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
364  }
365
366  bool isSCSrcV2F16() const {
367    return isSCSrcF16();
368  }
369
370  bool isSCSrcF32() const {
371    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
372  }
373
374  bool isSCSrcF64() const {
375    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
376  }
377
378  bool isSSrcB32() const {
379    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
380  }
381
382  bool isSSrcB16() const {
383    return isSCSrcB16() || isLiteralImm(MVT::i16);
384  }
385
386  bool isSSrcV2B16() const {
387    llvm_unreachable("cannot happen");
388    return isSSrcB16();
389  }
390
391  bool isSSrcB64() const {
392    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
393    // See isVSrc64().
394    return isSCSrcB64() || isLiteralImm(MVT::i64);
395  }
396
397  bool isSSrcF32() const {
398    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
399  }
400
401  bool isSSrcF64() const {
402    return isSCSrcB64() || isLiteralImm(MVT::f64);
403  }
404
405  bool isSSrcF16() const {
406    return isSCSrcB16() || isLiteralImm(MVT::f16);
407  }
408
409  bool isSSrcV2F16() const {
410    llvm_unreachable("cannot happen");
411    return isSSrcF16();
412  }
413
414  bool isVCSrcB32() const {
415    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
416  }
417
418  bool isVCSrcB64() const {
419    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
420  }
421
422  bool isVCSrcB16() const {
423    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
424  }
425
426  bool isVCSrcV2B16() const {
427    return isVCSrcB16();
428  }
429
430  bool isVCSrcF32() const {
431    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
432  }
433
434  bool isVCSrcF64() const {
435    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
436  }
437
438  bool isVCSrcF16() const {
439    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
440  }
441
442  bool isVCSrcV2F16() const {
443    return isVCSrcF16();
444  }
445
446  bool isVSrcB32() const {
447    return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
448  }
449
450  bool isVSrcB64() const {
451    return isVCSrcF64() || isLiteralImm(MVT::i64);
452  }
453
454  bool isVSrcB16() const {
455    return isVCSrcF16() || isLiteralImm(MVT::i16);
456  }
457
458  bool isVSrcV2B16() const {
459    llvm_unreachable("cannot happen");
460    return isVSrcB16();
461  }
462
463  bool isVSrcF32() const {
464    return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
465  }
466
467  bool isVSrcF64() const {
468    return isVCSrcF64() || isLiteralImm(MVT::f64);
469  }
470
471  bool isVSrcF16() const {
472    return isVCSrcF16() || isLiteralImm(MVT::f16);
473  }
474
475  bool isVSrcV2F16() const {
476    llvm_unreachable("cannot happen");
477    return isVSrcF16();
478  }
479
480  bool isKImmFP32() const {
481    return isLiteralImm(MVT::f32);
482  }
483
484  bool isKImmFP16() const {
485    return isLiteralImm(MVT::f16);
486  }
487
488  bool isMem() const override {
489    return false;
490  }
491
492  bool isExpr() const {
493    return Kind == Expression;
494  }
495
496  bool isSoppBrTarget() const {
497    return isExpr() || isImm();
498  }
499
500  bool isSWaitCnt() const;
501  bool isHwreg() const;
502  bool isSendMsg() const;
503  bool isSwizzle() const;
504  bool isSMRDOffset8() const;
505  bool isSMRDOffset20() const;
506  bool isSMRDLiteralOffset() const;
507  bool isDPPCtrl() const;
508  bool isGPRIdxMode() const;
509  bool isS16Imm() const;
510  bool isU16Imm() const;
511
512  StringRef getExpressionAsToken() const {
513    assert(isExpr());
514    const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
515    return S->getSymbol().getName();
516  }
517
518  StringRef getToken() const {
519    assert(isToken());
520
521    if (Kind == Expression)
522      return getExpressionAsToken();
523
524    return StringRef(Tok.Data, Tok.Length);
525  }
526
527  int64_t getImm() const {
528    assert(isImm());
529    return Imm.Val;
530  }
531
532  ImmTy getImmTy() const {
533    assert(isImm());
534    return Imm.Type;
535  }
536
537  unsigned getReg() const override {
538    return Reg.RegNo;
539  }
540
541  SMLoc getStartLoc() const override {
542    return StartLoc;
543  }
544
545  SMLoc getEndLoc() const override {
546    return EndLoc;
547  }
548
549  SMRange getLocRange() const {
550    return SMRange(StartLoc, EndLoc);
551  }
552
553  Modifiers getModifiers() const {
554    assert(isRegKind() || isImmTy(ImmTyNone));
555    return isRegKind() ? Reg.Mods : Imm.Mods;
556  }
557
558  void setModifiers(Modifiers Mods) {
559    assert(isRegKind() || isImmTy(ImmTyNone));
560    if (isRegKind())
561      Reg.Mods = Mods;
562    else
563      Imm.Mods = Mods;
564  }
565
566  bool hasModifiers() const {
567    return getModifiers().hasModifiers();
568  }
569
570  bool hasFPModifiers() const {
571    return getModifiers().hasFPModifiers();
572  }
573
574  bool hasIntModifiers() const {
575    return getModifiers().hasIntModifiers();
576  }
577
578  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
579
580  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
581
582  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
583
584  template <unsigned Bitwidth>
585  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
586
587  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
588    addKImmFPOperands<16>(Inst, N);
589  }
590
591  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
592    addKImmFPOperands<32>(Inst, N);
593  }
594
595  void addRegOperands(MCInst &Inst, unsigned N) const;
596
597  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
598    if (isRegKind())
599      addRegOperands(Inst, N);
600    else if (isExpr())
601      Inst.addOperand(MCOperand::createExpr(Expr));
602    else
603      addImmOperands(Inst, N);
604  }
605
606  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
607    Modifiers Mods = getModifiers();
608    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
609    if (isRegKind()) {
610      addRegOperands(Inst, N);
611    } else {
612      addImmOperands(Inst, N, false);
613    }
614  }
615
616  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
617    assert(!hasIntModifiers());
618    addRegOrImmWithInputModsOperands(Inst, N);
619  }
620
621  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
622    assert(!hasFPModifiers());
623    addRegOrImmWithInputModsOperands(Inst, N);
624  }
625
626  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
627    Modifiers Mods = getModifiers();
628    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
629    assert(isRegKind());
630    addRegOperands(Inst, N);
631  }
632
633  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
634    assert(!hasIntModifiers());
635    addRegWithInputModsOperands(Inst, N);
636  }
637
638  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
639    assert(!hasFPModifiers());
640    addRegWithInputModsOperands(Inst, N);
641  }
642
643  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
644    if (isImm())
645      addImmOperands(Inst, N);
646    else {
647      assert(isExpr());
648      Inst.addOperand(MCOperand::createExpr(Expr));
649    }
650  }
651
652  static void printImmTy(raw_ostream& OS, ImmTy Type) {
653    switch (Type) {
654    case ImmTyNone: OS << "None"; break;
655    case ImmTyGDS: OS << "GDS"; break;
656    case ImmTyLDS: OS << "LDS"; break;
657    case ImmTyOffen: OS << "Offen"; break;
658    case ImmTyIdxen: OS << "Idxen"; break;
659    case ImmTyAddr64: OS << "Addr64"; break;
660    case ImmTyOffset: OS << "Offset"; break;
661    case ImmTyInstOffset: OS << "InstOffset"; break;
662    case ImmTyOffset0: OS << "Offset0"; break;
663    case ImmTyOffset1: OS << "Offset1"; break;
664    case ImmTyGLC: OS << "GLC"; break;
665    case ImmTySLC: OS << "SLC"; break;
666    case ImmTyTFE: OS << "TFE"; break;
667    case ImmTyD16: OS << "D16"; break;
668    case ImmTyFORMAT: OS << "FORMAT"; break;
669    case ImmTyClampSI: OS << "ClampSI"; break;
670    case ImmTyOModSI: OS << "OModSI"; break;
671    case ImmTyDppCtrl: OS << "DppCtrl"; break;
672    case ImmTyDppRowMask: OS << "DppRowMask"; break;
673    case ImmTyDppBankMask: OS << "DppBankMask"; break;
674    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
675    case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
676    case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
677    case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
678    case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
679    case ImmTyDMask: OS << "DMask"; break;
680    case ImmTyUNorm: OS << "UNorm"; break;
681    case ImmTyDA: OS << "DA"; break;
682    case ImmTyR128A16: OS << "R128A16"; break;
683    case ImmTyLWE: OS << "LWE"; break;
684    case ImmTyOff: OS << "Off"; break;
685    case ImmTyExpTgt: OS << "ExpTgt"; break;
686    case ImmTyExpCompr: OS << "ExpCompr"; break;
687    case ImmTyExpVM: OS << "ExpVM"; break;
688    case ImmTyHwreg: OS << "Hwreg"; break;
689    case ImmTySendMsg: OS << "SendMsg"; break;
690    case ImmTyInterpSlot: OS << "InterpSlot"; break;
691    case ImmTyInterpAttr: OS << "InterpAttr"; break;
692    case ImmTyAttrChan: OS << "AttrChan"; break;
693    case ImmTyOpSel: OS << "OpSel"; break;
694    case ImmTyOpSelHi: OS << "OpSelHi"; break;
695    case ImmTyNegLo: OS << "NegLo"; break;
696    case ImmTyNegHi: OS << "NegHi"; break;
697    case ImmTySwizzle: OS << "Swizzle"; break;
698    case ImmTyHigh: OS << "High"; break;
699    }
700  }
701
702  void print(raw_ostream &OS) const override {
703    switch (Kind) {
704    case Register:
705      OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
706      break;
707    case Immediate:
708      OS << '<' << getImm();
709      if (getImmTy() != ImmTyNone) {
710        OS << " type: "; printImmTy(OS, getImmTy());
711      }
712      OS << " mods: " << Imm.Mods << '>';
713      break;
714    case Token:
715      OS << '\'' << getToken() << '\'';
716      break;
717    case Expression:
718      OS << "<expr " << *Expr << '>';
719      break;
720    }
721  }
722
723  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
724                                      int64_t Val, SMLoc Loc,
725                                      ImmTy Type = ImmTyNone,
726                                      bool IsFPImm = false) {
727    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
728    Op->Imm.Val = Val;
729    Op->Imm.IsFPImm = IsFPImm;
730    Op->Imm.Type = Type;
731    Op->Imm.Mods = Modifiers();
732    Op->StartLoc = Loc;
733    Op->EndLoc = Loc;
734    return Op;
735  }
736
737  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
738                                        StringRef Str, SMLoc Loc,
739                                        bool HasExplicitEncodingSize = true) {
740    auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
741    Res->Tok.Data = Str.data();
742    Res->Tok.Length = Str.size();
743    Res->StartLoc = Loc;
744    Res->EndLoc = Loc;
745    return Res;
746  }
747
748  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
749                                      unsigned RegNo, SMLoc S,
750                                      SMLoc E,
751                                      bool ForceVOP3) {
752    auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
753    Op->Reg.RegNo = RegNo;
754    Op->Reg.Mods = Modifiers();
755    Op->Reg.IsForcedVOP3 = ForceVOP3;
756    Op->StartLoc = S;
757    Op->EndLoc = E;
758    return Op;
759  }
760
761  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
762                                       const class MCExpr *Expr, SMLoc S) {
763    auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
764    Op->Expr = Expr;
765    Op->StartLoc = S;
766    Op->EndLoc = S;
767    return Op;
768  }
769};
770
771raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
772  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
773  return OS;
774}
775
776//===----------------------------------------------------------------------===//
777// AsmParser
778//===----------------------------------------------------------------------===//
779
780// Holds info related to the current kernel, e.g. count of SGPRs used.
781// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
782// .amdgpu_hsa_kernel or at EOF.
783class KernelScopeInfo {
784  int SgprIndexUnusedMin = -1;
785  int VgprIndexUnusedMin = -1;
786  MCContext *Ctx = nullptr;
787
788  void usesSgprAt(int i) {
789    if (i >= SgprIndexUnusedMin) {
790      SgprIndexUnusedMin = ++i;
791      if (Ctx) {
792        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
793        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
794      }
795    }
796  }
797
798  void usesVgprAt(int i) {
799    if (i >= VgprIndexUnusedMin) {
800      VgprIndexUnusedMin = ++i;
801      if (Ctx) {
802        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
803        Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
804      }
805    }
806  }
807
808public:
809  KernelScopeInfo() = default;
810
811  void initialize(MCContext &Context) {
812    Ctx = &Context;
813    usesSgprAt(SgprIndexUnusedMin = -1);
814    usesVgprAt(VgprIndexUnusedMin = -1);
815  }
816
817  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
818    switch (RegKind) {
819      case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
820      case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
821      default: break;
822    }
823  }
824};
825
826class AMDGPUAsmParser : public MCTargetAsmParser {
827  MCAsmParser &Parser;
828
829  // Number of extra operands parsed after the first optional operand.
830  // This may be necessary to skip hardcoded mandatory operands.
831  static const unsigned MAX_OPR_LOOKAHEAD = 8;
832
833  unsigned ForcedEncodingSize = 0;
834  bool ForcedDPP = false;
835  bool ForcedSDWA = false;
836  KernelScopeInfo KernelScope;
837
838  /// @name Auto-generated Match Functions
839  /// {
840
841#define GET_ASSEMBLER_HEADER
842#include "AMDGPUGenAsmMatcher.inc"
843
844  /// }
845
846private:
847  bool ParseAsAbsoluteExpression(uint32_t &Ret);
848  bool OutOfRangeError(SMRange Range);
849  /// Calculate VGPR/SGPR blocks required for given target, reserved
850  /// registers, and user-specified NextFreeXGPR values.
851  ///
852  /// \param Features [in] Target features, used for bug corrections.
853  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
854  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
855  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
856  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
857  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
858  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
859  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
860  /// \param VGPRBlocks [out] Result VGPR block count.
861  /// \param SGPRBlocks [out] Result SGPR block count.
862  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
863                          bool FlatScrUsed, bool XNACKUsed,
864                          unsigned NextFreeVGPR, SMRange VGPRRange,
865                          unsigned NextFreeSGPR, SMRange SGPRRange,
866                          unsigned &VGPRBlocks, unsigned &SGPRBlocks);
867  bool ParseDirectiveAMDGCNTarget();
868  bool ParseDirectiveAMDHSAKernel();
869  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
870  bool ParseDirectiveHSACodeObjectVersion();
871  bool ParseDirectiveHSACodeObjectISA();
872  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
873  bool ParseDirectiveAMDKernelCodeT();
874  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
875  bool ParseDirectiveAMDGPUHsaKernel();
876
877  bool ParseDirectiveISAVersion();
878  bool ParseDirectiveHSAMetadata();
879  bool ParseDirectivePALMetadata();
880
881  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
882                             RegisterKind RegKind, unsigned Reg1,
883                             unsigned RegNum);
884  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
885                           unsigned& RegNum, unsigned& RegWidth,
886                           unsigned *DwordRegIndex);
887  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
888  void initializeGprCountSymbol(RegisterKind RegKind);
889  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
890                             unsigned RegWidth);
891  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
892                    bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
893  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
894                 bool IsGdsHardcoded);
895
896public:
897  enum AMDGPUMatchResultTy {
898    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
899  };
900
901  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
902
903  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
904               const MCInstrInfo &MII,
905               const MCTargetOptions &Options)
906      : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
907    MCAsmParserExtension::Initialize(Parser);
908
909    if (getFeatureBits().none()) {
910      // Set default features.
911      copySTI().ToggleFeature("SOUTHERN_ISLANDS");
912    }
913
914    setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
915
916    {
917      // TODO: make those pre-defined variables read-only.
918      // Currently there is none suitable machinery in the core llvm-mc for this.
919      // MCSymbol::isRedefinable is intended for another purpose, and
920      // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
921      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
922      MCContext &Ctx = getContext();
923      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
924        MCSymbol *Sym =
925            Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
926        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
927      } else {
928        MCSymbol *Sym =
929            Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
930        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
931        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
932        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
933        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
934        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
935      }
936      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
937        initializeGprCountSymbol(IS_VGPR);
938        initializeGprCountSymbol(IS_SGPR);
939      } else
940        KernelScope.initialize(getContext());
941    }
942  }
943
944  bool hasXNACK() const {
945    return AMDGPU::hasXNACK(getSTI());
946  }
947
948  bool hasMIMG_R128() const {
949    return AMDGPU::hasMIMG_R128(getSTI());
950  }
951
952  bool hasPackedD16() const {
953    return AMDGPU::hasPackedD16(getSTI());
954  }
955
956  bool isSI() const {
957    return AMDGPU::isSI(getSTI());
958  }
959
960  bool isCI() const {
961    return AMDGPU::isCI(getSTI());
962  }
963
964  bool isVI() const {
965    return AMDGPU::isVI(getSTI());
966  }
967
968  bool isGFX9() const {
969    return AMDGPU::isGFX9(getSTI());
970  }
971
972  bool hasInv2PiInlineImm() const {
973    return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
974  }
975
976  bool hasFlatOffsets() const {
977    return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
978  }
979
980  bool hasSGPR102_SGPR103() const {
981    return !isVI();
982  }
983
984  bool hasIntClamp() const {
985    return getFeatureBits()[AMDGPU::FeatureIntClamp];
986  }
987
988  AMDGPUTargetStreamer &getTargetStreamer() {
989    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
990    return static_cast<AMDGPUTargetStreamer &>(TS);
991  }
992
993  const MCRegisterInfo *getMRI() const {
994    // We need this const_cast because for some reason getContext() is not const
995    // in MCAsmParser.
996    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
997  }
998
999  const MCInstrInfo *getMII() const {
1000    return &MII;
1001  }
1002
1003  const FeatureBitset &getFeatureBits() const {
1004    return getSTI().getFeatureBits();
1005  }
1006
1007  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1008  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1009  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1010
1011  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1012  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1013  bool isForcedDPP() const { return ForcedDPP; }
1014  bool isForcedSDWA() const { return ForcedSDWA; }
1015  ArrayRef<unsigned> getMatchedVariants() const;
1016
1017  std::unique_ptr<AMDGPUOperand> parseRegister();
1018  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1019  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1020  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1021                                      unsigned Kind) override;
1022  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1023                               OperandVector &Operands, MCStreamer &Out,
1024                               uint64_t &ErrorInfo,
1025                               bool MatchingInlineAsm) override;
1026  bool ParseDirective(AsmToken DirectiveID) override;
1027  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1028  StringRef parseMnemonicSuffix(StringRef Name);
1029  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1030                        SMLoc NameLoc, OperandVector &Operands) override;
1031  //bool ProcessInstruction(MCInst &Inst);
1032
1033  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1034
1035  OperandMatchResultTy
1036  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1037                     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1038                     bool (*ConvertResult)(int64_t &) = nullptr);
1039
1040  OperandMatchResultTy parseOperandArrayWithPrefix(
1041    const char *Prefix,
1042    OperandVector &Operands,
1043    AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1044    bool (*ConvertResult)(int64_t&) = nullptr);
1045
1046  OperandMatchResultTy
1047  parseNamedBit(const char *Name, OperandVector &Operands,
1048                AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1049  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1050                                             StringRef &Value);
1051
1052  bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1053  OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1054  OperandMatchResultTy parseReg(OperandVector &Operands);
1055  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1056  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1057  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1058  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1059  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1060  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1061  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1062
1063  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1064  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1065  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1066  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1067
1068  bool parseCnt(int64_t &IntVal);
1069  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1070  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1071
1072private:
1073  struct OperandInfoTy {
1074    int64_t Id;
1075    bool IsSymbolic = false;
1076
1077    OperandInfoTy(int64_t Id_) : Id(Id_) {}
1078  };
1079
1080  bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1081  bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1082
1083  void errorExpTgt();
1084  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1085
1086  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1087  bool validateConstantBusLimitations(const MCInst &Inst);
1088  bool validateEarlyClobberLimitations(const MCInst &Inst);
1089  bool validateIntClampSupported(const MCInst &Inst);
1090  bool validateMIMGAtomicDMask(const MCInst &Inst);
1091  bool validateMIMGGatherDMask(const MCInst &Inst);
1092  bool validateMIMGDataSize(const MCInst &Inst);
1093  bool validateMIMGD16(const MCInst &Inst);
1094  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1095  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1096  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1097
1098  bool trySkipId(const StringRef Id);
1099  bool trySkipToken(const AsmToken::TokenKind Kind);
1100  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1101  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1102  bool parseExpr(int64_t &Imm);
1103
1104public:
1105  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1106  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1107
1108  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1109  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1110  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1111  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1112  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1113
1114  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1115                            const unsigned MinVal,
1116                            const unsigned MaxVal,
1117                            const StringRef ErrMsg);
1118  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1119  bool parseSwizzleOffset(int64_t &Imm);
1120  bool parseSwizzleMacro(int64_t &Imm);
1121  bool parseSwizzleQuadPerm(int64_t &Imm);
1122  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1123  bool parseSwizzleBroadcast(int64_t &Imm);
1124  bool parseSwizzleSwap(int64_t &Imm);
1125  bool parseSwizzleReverse(int64_t &Imm);
1126
1127  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1128  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1129  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1130  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1131  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1132
1133  AMDGPUOperand::Ptr defaultGLC() const;
1134  AMDGPUOperand::Ptr defaultSLC() const;
1135
1136  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1137  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1138  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1139  AMDGPUOperand::Ptr defaultOffsetU12() const;
1140  AMDGPUOperand::Ptr defaultOffsetS13() const;
1141
1142  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1143
1144  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1145               OptionalImmIndexMap &OptionalIdx);
1146  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1147  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1148  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1149
1150  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1151
1152  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1153               bool IsAtomic = false);
1154  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1155
1156  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1157  AMDGPUOperand::Ptr defaultRowMask() const;
1158  AMDGPUOperand::Ptr defaultBankMask() const;
1159  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1160  void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1161
1162  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1163                                    AMDGPUOperand::ImmTy Type);
1164  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1165  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1166  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1167  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1168  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1169  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1170                uint64_t BasicInstType, bool skipVcc = false);
1171};
1172
1173struct OptionalOperand {
1174  const char *Name;
1175  AMDGPUOperand::ImmTy Type;
1176  bool IsBit;
1177  bool (*ConvertResult)(int64_t&);
1178};
1179
1180} // end anonymous namespace
1181
1182// May be called with integer type with equivalent bitwidth.
1183static const fltSemantics *getFltSemantics(unsigned Size) {
1184  switch (Size) {
1185  case 4:
1186    return &APFloat::IEEEsingle();
1187  case 8:
1188    return &APFloat::IEEEdouble();
1189  case 2:
1190    return &APFloat::IEEEhalf();
1191  default:
1192    llvm_unreachable("unsupported fp type");
1193  }
1194}
1195
1196static const fltSemantics *getFltSemantics(MVT VT) {
1197  return getFltSemantics(VT.getSizeInBits() / 8);
1198}
1199
1200static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1201  switch (OperandType) {
1202  case AMDGPU::OPERAND_REG_IMM_INT32:
1203  case AMDGPU::OPERAND_REG_IMM_FP32:
1204  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1205  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1206    return &APFloat::IEEEsingle();
1207  case AMDGPU::OPERAND_REG_IMM_INT64:
1208  case AMDGPU::OPERAND_REG_IMM_FP64:
1209  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1210  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1211    return &APFloat::IEEEdouble();
1212  case AMDGPU::OPERAND_REG_IMM_INT16:
1213  case AMDGPU::OPERAND_REG_IMM_FP16:
1214  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1215  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1216  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1217  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1218    return &APFloat::IEEEhalf();
1219  default:
1220    llvm_unreachable("unsupported fp type");
1221  }
1222}
1223
1224//===----------------------------------------------------------------------===//
1225// Operand
1226//===----------------------------------------------------------------------===//
1227
1228static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1229  bool Lost;
1230
1231  // Convert literal to single precision
1232  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1233                                               APFloat::rmNearestTiesToEven,
1234                                               &Lost);
1235  // We allow precision lost but not overflow or underflow
1236  if (Status != APFloat::opOK &&
1237      Lost &&
1238      ((Status & APFloat::opOverflow)  != 0 ||
1239       (Status & APFloat::opUnderflow) != 0)) {
1240    return false;
1241  }
1242
1243  return true;
1244}
1245
1246bool AMDGPUOperand::isInlinableImm(MVT type) const {
1247  if (!isImmTy(ImmTyNone)) {
1248    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1249    return false;
1250  }
1251  // TODO: We should avoid using host float here. It would be better to
1252  // check the float bit values which is what a few other places do.
1253  // We've had bot failures before due to weird NaN support on mips hosts.
1254
1255  APInt Literal(64, Imm.Val);
1256
1257  if (Imm.IsFPImm) { // We got fp literal token
1258    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1259      return AMDGPU::isInlinableLiteral64(Imm.Val,
1260                                          AsmParser->hasInv2PiInlineImm());
1261    }
1262
1263    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1264    if (!canLosslesslyConvertToFPType(FPLiteral, type))
1265      return false;
1266
1267    if (type.getScalarSizeInBits() == 16) {
1268      return AMDGPU::isInlinableLiteral16(
1269        static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1270        AsmParser->hasInv2PiInlineImm());
1271    }
1272
1273    // Check if single precision literal is inlinable
1274    return AMDGPU::isInlinableLiteral32(
1275      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1276      AsmParser->hasInv2PiInlineImm());
1277  }
1278
1279  // We got int literal token.
1280  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1281    return AMDGPU::isInlinableLiteral64(Imm.Val,
1282                                        AsmParser->hasInv2PiInlineImm());
1283  }
1284
1285  if (type.getScalarSizeInBits() == 16) {
1286    return AMDGPU::isInlinableLiteral16(
1287      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1288      AsmParser->hasInv2PiInlineImm());
1289  }
1290
1291  return AMDGPU::isInlinableLiteral32(
1292    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1293    AsmParser->hasInv2PiInlineImm());
1294}
1295
1296bool AMDGPUOperand::isLiteralImm(MVT type) const {
1297  // Check that this immediate can be added as literal
1298  if (!isImmTy(ImmTyNone)) {
1299    return false;
1300  }
1301
1302  if (!Imm.IsFPImm) {
1303    // We got int literal token.
1304
1305    if (type == MVT::f64 && hasFPModifiers()) {
1306      // Cannot apply fp modifiers to int literals preserving the same semantics
1307      // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1308      // disable these cases.
1309      return false;
1310    }
1311
1312    unsigned Size = type.getSizeInBits();
1313    if (Size == 64)
1314      Size = 32;
1315
1316    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1317    // types.
1318    return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1319  }
1320
1321  // We got fp literal token
1322  if (type == MVT::f64) { // Expected 64-bit fp operand
1323    // We would set low 64-bits of literal to zeroes but we accept this literals
1324    return true;
1325  }
1326
1327  if (type == MVT::i64) { // Expected 64-bit int operand
1328    // We don't allow fp literals in 64-bit integer instructions. It is
1329    // unclear how we should encode them.
1330    return false;
1331  }
1332
1333  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1334  return canLosslesslyConvertToFPType(FPLiteral, type);
1335}
1336
1337bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1338  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1339}
1340
1341bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1342  if (AsmParser->isVI())
1343    return isVReg();
1344  else if (AsmParser->isGFX9())
1345    return isRegKind() || isInlinableImm(type);
1346  else
1347    return false;
1348}
1349
1350bool AMDGPUOperand::isSDWAFP16Operand() const {
1351  return isSDWAOperand(MVT::f16);
1352}
1353
1354bool AMDGPUOperand::isSDWAFP32Operand() const {
1355  return isSDWAOperand(MVT::f32);
1356}
1357
1358bool AMDGPUOperand::isSDWAInt16Operand() const {
1359  return isSDWAOperand(MVT::i16);
1360}
1361
1362bool AMDGPUOperand::isSDWAInt32Operand() const {
1363  return isSDWAOperand(MVT::i32);
1364}
1365
1366uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1367{
1368  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1369  assert(Size == 2 || Size == 4 || Size == 8);
1370
1371  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1372
1373  if (Imm.Mods.Abs) {
1374    Val &= ~FpSignMask;
1375  }
1376  if (Imm.Mods.Neg) {
1377    Val ^= FpSignMask;
1378  }
1379
1380  return Val;
1381}
1382
1383void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1384  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1385                             Inst.getNumOperands())) {
1386    addLiteralImmOperand(Inst, Imm.Val,
1387                         ApplyModifiers &
1388                         isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1389  } else {
1390    assert(!isImmTy(ImmTyNone) || !hasModifiers());
1391    Inst.addOperand(MCOperand::createImm(Imm.Val));
1392  }
1393}
1394
1395void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1396  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1397  auto OpNum = Inst.getNumOperands();
1398  // Check that this operand accepts literals
1399  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1400
1401  if (ApplyModifiers) {
1402    assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1403    const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1404    Val = applyInputFPModifiers(Val, Size);
1405  }
1406
1407  APInt Literal(64, Val);
1408  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1409
1410  if (Imm.IsFPImm) { // We got fp literal token
1411    switch (OpTy) {
1412    case AMDGPU::OPERAND_REG_IMM_INT64:
1413    case AMDGPU::OPERAND_REG_IMM_FP64:
1414    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1415    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1416      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1417                                       AsmParser->hasInv2PiInlineImm())) {
1418        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1419        return;
1420      }
1421
1422      // Non-inlineable
1423      if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1424        // For fp operands we check if low 32 bits are zeros
1425        if (Literal.getLoBits(32) != 0) {
1426          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1427          "Can't encode literal as exact 64-bit floating-point operand. "
1428          "Low 32-bits will be set to zero");
1429        }
1430
1431        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1432        return;
1433      }
1434
1435      // We don't allow fp literals in 64-bit integer instructions. It is
1436      // unclear how we should encode them. This case should be checked earlier
1437      // in predicate methods (isLiteralImm())
1438      llvm_unreachable("fp literal in 64-bit integer instruction.");
1439
1440    case AMDGPU::OPERAND_REG_IMM_INT32:
1441    case AMDGPU::OPERAND_REG_IMM_FP32:
1442    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1443    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1444    case AMDGPU::OPERAND_REG_IMM_INT16:
1445    case AMDGPU::OPERAND_REG_IMM_FP16:
1446    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1447    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1448    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1449    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1450      bool lost;
1451      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1452      // Convert literal to single precision
1453      FPLiteral.convert(*getOpFltSemantics(OpTy),
1454                        APFloat::rmNearestTiesToEven, &lost);
1455      // We allow precision lost but not overflow or underflow. This should be
1456      // checked earlier in isLiteralImm()
1457
1458      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1459      if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1460          OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1461        ImmVal |= (ImmVal << 16);
1462      }
1463
1464      Inst.addOperand(MCOperand::createImm(ImmVal));
1465      return;
1466    }
1467    default:
1468      llvm_unreachable("invalid operand size");
1469    }
1470
1471    return;
1472  }
1473
1474   // We got int literal token.
1475  // Only sign extend inline immediates.
1476  // FIXME: No errors on truncation
1477  switch (OpTy) {
1478  case AMDGPU::OPERAND_REG_IMM_INT32:
1479  case AMDGPU::OPERAND_REG_IMM_FP32:
1480  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1481  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1482    if (isInt<32>(Val) &&
1483        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1484                                     AsmParser->hasInv2PiInlineImm())) {
1485      Inst.addOperand(MCOperand::createImm(Val));
1486      return;
1487    }
1488
1489    Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1490    return;
1491
1492  case AMDGPU::OPERAND_REG_IMM_INT64:
1493  case AMDGPU::OPERAND_REG_IMM_FP64:
1494  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1495  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1496    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1497      Inst.addOperand(MCOperand::createImm(Val));
1498      return;
1499    }
1500
1501    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1502    return;
1503
1504  case AMDGPU::OPERAND_REG_IMM_INT16:
1505  case AMDGPU::OPERAND_REG_IMM_FP16:
1506  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1507  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1508    if (isInt<16>(Val) &&
1509        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1510                                     AsmParser->hasInv2PiInlineImm())) {
1511      Inst.addOperand(MCOperand::createImm(Val));
1512      return;
1513    }
1514
1515    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1516    return;
1517
1518  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1519  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1520    auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1521    assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1522                                        AsmParser->hasInv2PiInlineImm()));
1523
1524    uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1525                      static_cast<uint32_t>(LiteralVal);
1526    Inst.addOperand(MCOperand::createImm(ImmVal));
1527    return;
1528  }
1529  default:
1530    llvm_unreachable("invalid operand size");
1531  }
1532}
1533
1534template <unsigned Bitwidth>
1535void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1536  APInt Literal(64, Imm.Val);
1537
1538  if (!Imm.IsFPImm) {
1539    // We got int literal token.
1540    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1541    return;
1542  }
1543
1544  bool Lost;
1545  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1546  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1547                    APFloat::rmNearestTiesToEven, &Lost);
1548  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1549}
1550
1551void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1552  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1553}
1554
1555//===----------------------------------------------------------------------===//
1556// AsmParser
1557//===----------------------------------------------------------------------===//
1558
1559static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1560  if (Is == IS_VGPR) {
1561    switch (RegWidth) {
1562      default: return -1;
1563      case 1: return AMDGPU::VGPR_32RegClassID;
1564      case 2: return AMDGPU::VReg_64RegClassID;
1565      case 3: return AMDGPU::VReg_96RegClassID;
1566      case 4: return AMDGPU::VReg_128RegClassID;
1567      case 8: return AMDGPU::VReg_256RegClassID;
1568      case 16: return AMDGPU::VReg_512RegClassID;
1569    }
1570  } else if (Is == IS_TTMP) {
1571    switch (RegWidth) {
1572      default: return -1;
1573      case 1: return AMDGPU::TTMP_32RegClassID;
1574      case 2: return AMDGPU::TTMP_64RegClassID;
1575      case 4: return AMDGPU::TTMP_128RegClassID;
1576      case 8: return AMDGPU::TTMP_256RegClassID;
1577      case 16: return AMDGPU::TTMP_512RegClassID;
1578    }
1579  } else if (Is == IS_SGPR) {
1580    switch (RegWidth) {
1581      default: return -1;
1582      case 1: return AMDGPU::SGPR_32RegClassID;
1583      case 2: return AMDGPU::SGPR_64RegClassID;
1584      case 4: return AMDGPU::SGPR_128RegClassID;
1585      case 8: return AMDGPU::SGPR_256RegClassID;
1586      case 16: return AMDGPU::SGPR_512RegClassID;
1587    }
1588  }
1589  return -1;
1590}
1591
1592static unsigned getSpecialRegForName(StringRef RegName) {
1593  return StringSwitch<unsigned>(RegName)
1594    .Case("exec", AMDGPU::EXEC)
1595    .Case("vcc", AMDGPU::VCC)
1596    .Case("flat_scratch", AMDGPU::FLAT_SCR)
1597    .Case("xnack_mask", AMDGPU::XNACK_MASK)
1598    .Case("m0", AMDGPU::M0)
1599    .Case("scc", AMDGPU::SCC)
1600    .Case("tba", AMDGPU::TBA)
1601    .Case("tma", AMDGPU::TMA)
1602    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1603    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1604    .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1605    .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1606    .Case("vcc_lo", AMDGPU::VCC_LO)
1607    .Case("vcc_hi", AMDGPU::VCC_HI)
1608    .Case("exec_lo", AMDGPU::EXEC_LO)
1609    .Case("exec_hi", AMDGPU::EXEC_HI)
1610    .Case("tma_lo", AMDGPU::TMA_LO)
1611    .Case("tma_hi", AMDGPU::TMA_HI)
1612    .Case("tba_lo", AMDGPU::TBA_LO)
1613    .Case("tba_hi", AMDGPU::TBA_HI)
1614    .Default(0);
1615}
1616
1617bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1618                                    SMLoc &EndLoc) {
1619  auto R = parseRegister();
1620  if (!R) return true;
1621  assert(R->isReg());
1622  RegNo = R->getReg();
1623  StartLoc = R->getStartLoc();
1624  EndLoc = R->getEndLoc();
1625  return false;
1626}
1627
1628bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1629                                            RegisterKind RegKind, unsigned Reg1,
1630                                            unsigned RegNum) {
1631  switch (RegKind) {
1632  case IS_SPECIAL:
1633    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1634      Reg = AMDGPU::EXEC;
1635      RegWidth = 2;
1636      return true;
1637    }
1638    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1639      Reg = AMDGPU::FLAT_SCR;
1640      RegWidth = 2;
1641      return true;
1642    }
1643    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1644      Reg = AMDGPU::XNACK_MASK;
1645      RegWidth = 2;
1646      return true;
1647    }
1648    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1649      Reg = AMDGPU::VCC;
1650      RegWidth = 2;
1651      return true;
1652    }
1653    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1654      Reg = AMDGPU::TBA;
1655      RegWidth = 2;
1656      return true;
1657    }
1658    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1659      Reg = AMDGPU::TMA;
1660      RegWidth = 2;
1661      return true;
1662    }
1663    return false;
1664  case IS_VGPR:
1665  case IS_SGPR:
1666  case IS_TTMP:
1667    if (Reg1 != Reg + RegWidth) {
1668      return false;
1669    }
1670    RegWidth++;
1671    return true;
1672  default:
1673    llvm_unreachable("unexpected register kind");
1674  }
1675}
1676
1677bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1678                                          unsigned &RegNum, unsigned &RegWidth,
1679                                          unsigned *DwordRegIndex) {
1680  if (DwordRegIndex) { *DwordRegIndex = 0; }
1681  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1682  if (getLexer().is(AsmToken::Identifier)) {
1683    StringRef RegName = Parser.getTok().getString();
1684    if ((Reg = getSpecialRegForName(RegName))) {
1685      Parser.Lex();
1686      RegKind = IS_SPECIAL;
1687    } else {
1688      unsigned RegNumIndex = 0;
1689      if (RegName[0] == 'v') {
1690        RegNumIndex = 1;
1691        RegKind = IS_VGPR;
1692      } else if (RegName[0] == 's') {
1693        RegNumIndex = 1;
1694        RegKind = IS_SGPR;
1695      } else if (RegName.startswith("ttmp")) {
1696        RegNumIndex = strlen("ttmp");
1697        RegKind = IS_TTMP;
1698      } else {
1699        return false;
1700      }
1701      if (RegName.size() > RegNumIndex) {
1702        // Single 32-bit register: vXX.
1703        if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1704          return false;
1705        Parser.Lex();
1706        RegWidth = 1;
1707      } else {
1708        // Range of registers: v[XX:YY]. ":YY" is optional.
1709        Parser.Lex();
1710        int64_t RegLo, RegHi;
1711        if (getLexer().isNot(AsmToken::LBrac))
1712          return false;
1713        Parser.Lex();
1714
1715        if (getParser().parseAbsoluteExpression(RegLo))
1716          return false;
1717
1718        const bool isRBrace = getLexer().is(AsmToken::RBrac);
1719        if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1720          return false;
1721        Parser.Lex();
1722
1723        if (isRBrace) {
1724          RegHi = RegLo;
1725        } else {
1726          if (getParser().parseAbsoluteExpression(RegHi))
1727            return false;
1728
1729          if (getLexer().isNot(AsmToken::RBrac))
1730            return false;
1731          Parser.Lex();
1732        }
1733        RegNum = (unsigned) RegLo;
1734        RegWidth = (RegHi - RegLo) + 1;
1735      }
1736    }
1737  } else if (getLexer().is(AsmToken::LBrac)) {
1738    // List of consecutive registers: [s0,s1,s2,s3]
1739    Parser.Lex();
1740    if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1741      return false;
1742    if (RegWidth != 1)
1743      return false;
1744    RegisterKind RegKind1;
1745    unsigned Reg1, RegNum1, RegWidth1;
1746    do {
1747      if (getLexer().is(AsmToken::Comma)) {
1748        Parser.Lex();
1749      } else if (getLexer().is(AsmToken::RBrac)) {
1750        Parser.Lex();
1751        break;
1752      } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1753        if (RegWidth1 != 1) {
1754          return false;
1755        }
1756        if (RegKind1 != RegKind) {
1757          return false;
1758        }
1759        if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1760          return false;
1761        }
1762      } else {
1763        return false;
1764      }
1765    } while (true);
1766  } else {
1767    return false;
1768  }
1769  switch (RegKind) {
1770  case IS_SPECIAL:
1771    RegNum = 0;
1772    RegWidth = 1;
1773    break;
1774  case IS_VGPR:
1775  case IS_SGPR:
1776  case IS_TTMP:
1777  {
1778    unsigned Size = 1;
1779    if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1780      // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1781      Size = std::min(RegWidth, 4u);
1782    }
1783    if (RegNum % Size != 0)
1784      return false;
1785    if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1786    RegNum = RegNum / Size;
1787    int RCID = getRegClass(RegKind, RegWidth);
1788    if (RCID == -1)
1789      return false;
1790    const MCRegisterClass RC = TRI->getRegClass(RCID);
1791    if (RegNum >= RC.getNumRegs())
1792      return false;
1793    Reg = RC.getRegister(RegNum);
1794    break;
1795  }
1796
1797  default:
1798    llvm_unreachable("unexpected register kind");
1799  }
1800
1801  if (!subtargetHasRegister(*TRI, Reg))
1802    return false;
1803  return true;
1804}
1805
1806Optional<StringRef>
1807AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1808  switch (RegKind) {
1809  case IS_VGPR:
1810    return StringRef(".amdgcn.next_free_vgpr");
1811  case IS_SGPR:
1812    return StringRef(".amdgcn.next_free_sgpr");
1813  default:
1814    return None;
1815  }
1816}
1817
1818void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1819  auto SymbolName = getGprCountSymbolName(RegKind);
1820  assert(SymbolName && "initializing invalid register kind");
1821  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1822  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1823}
1824
1825bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1826                                            unsigned DwordRegIndex,
1827                                            unsigned RegWidth) {
1828  // Symbols are only defined for GCN targets
1829  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1830    return true;
1831
1832  auto SymbolName = getGprCountSymbolName(RegKind);
1833  if (!SymbolName)
1834    return true;
1835  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1836
1837  int64_t NewMax = DwordRegIndex + RegWidth - 1;
1838  int64_t OldCount;
1839
1840  if (!Sym->isVariable())
1841    return !Error(getParser().getTok().getLoc(),
1842                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1843  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1844    return !Error(
1845        getParser().getTok().getLoc(),
1846        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1847
1848  if (OldCount <= NewMax)
1849    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1850
1851  return true;
1852}
1853
1854std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1855  const auto &Tok = Parser.getTok();
1856  SMLoc StartLoc = Tok.getLoc();
1857  SMLoc EndLoc = Tok.getEndLoc();
1858  RegisterKind RegKind;
1859  unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1860
1861  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1862    return nullptr;
1863  }
1864  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1865    if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1866      return nullptr;
1867  } else
1868    KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1869  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1870}
1871
1872bool
1873AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1874  if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1875      (getLexer().getKind() == AsmToken::Integer ||
1876       getLexer().getKind() == AsmToken::Real)) {
1877    // This is a workaround for handling operands like these:
1878    //     |1.0|
1879    //     |-1|
1880    // This syntax is not compatible with syntax of standard
1881    // MC expressions (due to the trailing '|').
1882
1883    SMLoc EndLoc;
1884    const MCExpr *Expr;
1885
1886    if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1887      return true;
1888    }
1889
1890    return !Expr->evaluateAsAbsolute(Val);
1891  }
1892
1893  return getParser().parseAbsoluteExpression(Val);
1894}
1895
1896OperandMatchResultTy
1897AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1898  // TODO: add syntactic sugar for 1/(2*PI)
1899  bool Minus = false;
1900  if (getLexer().getKind() == AsmToken::Minus) {
1901    const AsmToken NextToken = getLexer().peekTok();
1902    if (!NextToken.is(AsmToken::Integer) &&
1903        !NextToken.is(AsmToken::Real)) {
1904        return MatchOperand_NoMatch;
1905    }
1906    Minus = true;
1907    Parser.Lex();
1908  }
1909
1910  SMLoc S = Parser.getTok().getLoc();
1911  switch(getLexer().getKind()) {
1912  case AsmToken::Integer: {
1913    int64_t IntVal;
1914    if (parseAbsoluteExpr(IntVal, AbsMod))
1915      return MatchOperand_ParseFail;
1916    if (Minus)
1917      IntVal *= -1;
1918    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1919    return MatchOperand_Success;
1920  }
1921  case AsmToken::Real: {
1922    int64_t IntVal;
1923    if (parseAbsoluteExpr(IntVal, AbsMod))
1924      return MatchOperand_ParseFail;
1925
1926    APFloat F(BitsToDouble(IntVal));
1927    if (Minus)
1928      F.changeSign();
1929    Operands.push_back(
1930        AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1931                                 AMDGPUOperand::ImmTyNone, true));
1932    return MatchOperand_Success;
1933  }
1934  default:
1935    return MatchOperand_NoMatch;
1936  }
1937}
1938
1939OperandMatchResultTy
1940AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1941  if (auto R = parseRegister()) {
1942    assert(R->isReg());
1943    R->Reg.IsForcedVOP3 = isForcedVOP3();
1944    Operands.push_back(std::move(R));
1945    return MatchOperand_Success;
1946  }
1947  return MatchOperand_NoMatch;
1948}
1949
1950OperandMatchResultTy
1951AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1952  auto res = parseImm(Operands, AbsMod);
1953  if (res != MatchOperand_NoMatch) {
1954    return res;
1955  }
1956
1957  return parseReg(Operands);
1958}
1959
1960OperandMatchResultTy
1961AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1962                                              bool AllowImm) {
1963  bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1964
1965  if (getLexer().getKind()== AsmToken::Minus) {
1966    const AsmToken NextToken = getLexer().peekTok();
1967
1968    // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1969    if (NextToken.is(AsmToken::Minus)) {
1970      Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1971      return MatchOperand_ParseFail;
1972    }
1973
1974    // '-' followed by an integer literal N should be interpreted as integer
1975    // negation rather than a floating-point NEG modifier applied to N.
1976    // Beside being contr-intuitive, such use of floating-point NEG modifier
1977    // results in different meaning of integer literals used with VOP1/2/C
1978    // and VOP3, for example:
1979    //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1980    //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1981    // Negative fp literals should be handled likewise for unifomtity
1982    if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1983      Parser.Lex();
1984      Negate = true;
1985    }
1986  }
1987
1988  if (getLexer().getKind() == AsmToken::Identifier &&
1989      Parser.getTok().getString() == "neg") {
1990    if (Negate) {
1991      Error(Parser.getTok().getLoc(), "expected register or immediate");
1992      return MatchOperand_ParseFail;
1993    }
1994    Parser.Lex();
1995    Negate2 = true;
1996    if (getLexer().isNot(AsmToken::LParen)) {
1997      Error(Parser.getTok().getLoc(), "expected left paren after neg");
1998      return MatchOperand_ParseFail;
1999    }
2000    Parser.Lex();
2001  }
2002
2003  if (getLexer().getKind() == AsmToken::Identifier &&
2004      Parser.getTok().getString() == "abs") {
2005    Parser.Lex();
2006    Abs2 = true;
2007    if (getLexer().isNot(AsmToken::LParen)) {
2008      Error(Parser.getTok().getLoc(), "expected left paren after abs");
2009      return MatchOperand_ParseFail;
2010    }
2011    Parser.Lex();
2012  }
2013
2014  if (getLexer().getKind() == AsmToken::Pipe) {
2015    if (Abs2) {
2016      Error(Parser.getTok().getLoc(), "expected register or immediate");
2017      return MatchOperand_ParseFail;
2018    }
2019    Parser.Lex();
2020    Abs = true;
2021  }
2022
2023  OperandMatchResultTy Res;
2024  if (AllowImm) {
2025    Res = parseRegOrImm(Operands, Abs);
2026  } else {
2027    Res = parseReg(Operands);
2028  }
2029  if (Res != MatchOperand_Success) {
2030    return Res;
2031  }
2032
2033  AMDGPUOperand::Modifiers Mods;
2034  if (Abs) {
2035    if (getLexer().getKind() != AsmToken::Pipe) {
2036      Error(Parser.getTok().getLoc(), "expected vertical bar");
2037      return MatchOperand_ParseFail;
2038    }
2039    Parser.Lex();
2040    Mods.Abs = true;
2041  }
2042  if (Abs2) {
2043    if (getLexer().isNot(AsmToken::RParen)) {
2044      Error(Parser.getTok().getLoc(), "expected closing parentheses");
2045      return MatchOperand_ParseFail;
2046    }
2047    Parser.Lex();
2048    Mods.Abs = true;
2049  }
2050
2051  if (Negate) {
2052    Mods.Neg = true;
2053  } else if (Negate2) {
2054    if (getLexer().isNot(AsmToken::RParen)) {
2055      Error(Parser.getTok().getLoc(), "expected closing parentheses");
2056      return MatchOperand_ParseFail;
2057    }
2058    Parser.Lex();
2059    Mods.Neg = true;
2060  }
2061
2062  if (Mods.hasFPModifiers()) {
2063    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2064    Op.setModifiers(Mods);
2065  }
2066  return MatchOperand_Success;
2067}
2068
2069OperandMatchResultTy
2070AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2071                                               bool AllowImm) {
2072  bool Sext = false;
2073
2074  if (getLexer().getKind() == AsmToken::Identifier &&
2075      Parser.getTok().getString() == "sext") {
2076    Parser.Lex();
2077    Sext = true;
2078    if (getLexer().isNot(AsmToken::LParen)) {
2079      Error(Parser.getTok().getLoc(), "expected left paren after sext");
2080      return MatchOperand_ParseFail;
2081    }
2082    Parser.Lex();
2083  }
2084
2085  OperandMatchResultTy Res;
2086  if (AllowImm) {
2087    Res = parseRegOrImm(Operands);
2088  } else {
2089    Res = parseReg(Operands);
2090  }
2091  if (Res != MatchOperand_Success) {
2092    return Res;
2093  }
2094
2095  AMDGPUOperand::Modifiers Mods;
2096  if (Sext) {
2097    if (getLexer().isNot(AsmToken::RParen)) {
2098      Error(Parser.getTok().getLoc(), "expected closing parentheses");
2099      return MatchOperand_ParseFail;
2100    }
2101    Parser.Lex();
2102    Mods.Sext = true;
2103  }
2104
2105  if (Mods.hasIntModifiers()) {
2106    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2107    Op.setModifiers(Mods);
2108  }
2109
2110  return MatchOperand_Success;
2111}
2112
2113OperandMatchResultTy
2114AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2115  return parseRegOrImmWithFPInputMods(Operands, false);
2116}
2117
2118OperandMatchResultTy
2119AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2120  return parseRegOrImmWithIntInputMods(Operands, false);
2121}
2122
2123OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2124  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2125  if (Reg) {
2126    Operands.push_back(std::move(Reg));
2127    return MatchOperand_Success;
2128  }
2129
2130  const AsmToken &Tok = Parser.getTok();
2131  if (Tok.getString() == "off") {
2132    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2133                                                AMDGPUOperand::ImmTyOff, false));
2134    Parser.Lex();
2135    return MatchOperand_Success;
2136  }
2137
2138  return MatchOperand_NoMatch;
2139}
2140
2141unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2142  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2143
2144  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2145      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2146      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2147      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2148    return Match_InvalidOperand;
2149
2150  if ((TSFlags & SIInstrFlags::VOP3) &&
2151      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2152      getForcedEncodingSize() != 64)
2153    return Match_PreferE32;
2154
2155  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2156      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2157    // v_mac_f32/16 allow only dst_sel == DWORD;
2158    auto OpNum =
2159        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2160    const auto &Op = Inst.getOperand(OpNum);
2161    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2162      return Match_InvalidOperand;
2163    }
2164  }
2165
2166  if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2167    // FIXME: Produces error without correct column reported.
2168    auto OpNum =
2169        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2170    const auto &Op = Inst.getOperand(OpNum);
2171    if (Op.getImm() != 0)
2172      return Match_InvalidOperand;
2173  }
2174
2175  return Match_Success;
2176}
2177
2178// What asm variants we should check
2179ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2180  if (getForcedEncodingSize() == 32) {
2181    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2182    return makeArrayRef(Variants);
2183  }
2184
2185  if (isForcedVOP3()) {
2186    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2187    return makeArrayRef(Variants);
2188  }
2189
2190  if (isForcedSDWA()) {
2191    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2192                                        AMDGPUAsmVariants::SDWA9};
2193    return makeArrayRef(Variants);
2194  }
2195
2196  if (isForcedDPP()) {
2197    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2198    return makeArrayRef(Variants);
2199  }
2200
2201  static const unsigned Variants[] = {
2202    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2203    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2204  };
2205
2206  return makeArrayRef(Variants);
2207}
2208
2209unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2210  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2211  const unsigned Num = Desc.getNumImplicitUses();
2212  for (unsigned i = 0; i < Num; ++i) {
2213    unsigned Reg = Desc.ImplicitUses[i];
2214    switch (Reg) {
2215    case AMDGPU::FLAT_SCR:
2216    case AMDGPU::VCC:
2217    case AMDGPU::M0:
2218      return Reg;
2219    default:
2220      break;
2221    }
2222  }
2223  return AMDGPU::NoRegister;
2224}
2225
2226// NB: This code is correct only when used to check constant
2227// bus limitations because GFX7 support no f16 inline constants.
2228// Note that there are no cases when a GFX7 opcode violates
2229// constant bus limitations due to the use of an f16 constant.
2230bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2231                                       unsigned OpIdx) const {
2232  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2233
2234  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2235    return false;
2236  }
2237
2238  const MCOperand &MO = Inst.getOperand(OpIdx);
2239
2240  int64_t Val = MO.getImm();
2241  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2242
2243  switch (OpSize) { // expected operand size
2244  case 8:
2245    return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2246  case 4:
2247    return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2248  case 2: {
2249    const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2250    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2251        OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2252      return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2253    } else {
2254      return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2255    }
2256  }
2257  default:
2258    llvm_unreachable("invalid operand size");
2259  }
2260}
2261
2262bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2263  const MCOperand &MO = Inst.getOperand(OpIdx);
2264  if (MO.isImm()) {
2265    return !isInlineConstant(Inst, OpIdx);
2266  }
2267  return !MO.isReg() ||
2268         isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2269}
2270
2271bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2272  const unsigned Opcode = Inst.getOpcode();
2273  const MCInstrDesc &Desc = MII.get(Opcode);
2274  unsigned ConstantBusUseCount = 0;
2275
2276  if (Desc.TSFlags &
2277      (SIInstrFlags::VOPC |
2278       SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2279       SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2280       SIInstrFlags::SDWA)) {
2281    // Check special imm operands (used by madmk, etc)
2282    if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2283      ++ConstantBusUseCount;
2284    }
2285
2286    unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2287    if (SGPRUsed != AMDGPU::NoRegister) {
2288      ++ConstantBusUseCount;
2289    }
2290
2291    const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2292    const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2293    const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2294
2295    const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2296
2297    for (int OpIdx : OpIndices) {
2298      if (OpIdx == -1) break;
2299
2300      const MCOperand &MO = Inst.getOperand(OpIdx);
2301      if (usesConstantBus(Inst, OpIdx)) {
2302        if (MO.isReg()) {
2303          const unsigned Reg = mc2PseudoReg(MO.getReg());
2304          // Pairs of registers with a partial intersections like these
2305          //   s0, s[0:1]
2306          //   flat_scratch_lo, flat_scratch
2307          //   flat_scratch_lo, flat_scratch_hi
2308          // are theoretically valid but they are disabled anyway.
2309          // Note that this code mimics SIInstrInfo::verifyInstruction
2310          if (Reg != SGPRUsed) {
2311            ++ConstantBusUseCount;
2312          }
2313          SGPRUsed = Reg;
2314        } else { // Expression or a literal
2315          ++ConstantBusUseCount;
2316        }
2317      }
2318    }
2319  }
2320
2321  return ConstantBusUseCount <= 1;
2322}
2323
2324bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2325  const unsigned Opcode = Inst.getOpcode();
2326  const MCInstrDesc &Desc = MII.get(Opcode);
2327
2328  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2329  if (DstIdx == -1 ||
2330      Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2331    return true;
2332  }
2333
2334  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2335
2336  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2337  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2338  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2339
2340  assert(DstIdx != -1);
2341  const MCOperand &Dst = Inst.getOperand(DstIdx);
2342  assert(Dst.isReg());
2343  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2344
2345  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2346
2347  for (int SrcIdx : SrcIndices) {
2348    if (SrcIdx == -1) break;
2349    const MCOperand &Src = Inst.getOperand(SrcIdx);
2350    if (Src.isReg()) {
2351      const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2352      if (isRegIntersect(DstReg, SrcReg, TRI)) {
2353        return false;
2354      }
2355    }
2356  }
2357
2358  return true;
2359}
2360
2361bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2362
2363  const unsigned Opc = Inst.getOpcode();
2364  const MCInstrDesc &Desc = MII.get(Opc);
2365
2366  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2367    int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2368    assert(ClampIdx != -1);
2369    return Inst.getOperand(ClampIdx).getImm() == 0;
2370  }
2371
2372  return true;
2373}
2374
2375bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2376
2377  const unsigned Opc = Inst.getOpcode();
2378  const MCInstrDesc &Desc = MII.get(Opc);
2379
2380  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2381    return true;
2382
2383  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2384  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2385  int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2386
2387  assert(VDataIdx != -1);
2388  assert(DMaskIdx != -1);
2389  assert(TFEIdx != -1);
2390
2391  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2392  unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2393  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2394  if (DMask == 0)
2395    DMask = 1;
2396
2397  unsigned DataSize =
2398    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2399  if (hasPackedD16()) {
2400    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2401    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2402      DataSize = (DataSize + 1) / 2;
2403  }
2404
2405  return (VDataSize / 4) == DataSize + TFESize;
2406}
2407
2408bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2409
2410  const unsigned Opc = Inst.getOpcode();
2411  const MCInstrDesc &Desc = MII.get(Opc);
2412
2413  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2414    return true;
2415  if (!Desc.mayLoad() || !Desc.mayStore())
2416    return true; // Not atomic
2417
2418  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2419  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2420
2421  // This is an incomplete check because image_atomic_cmpswap
2422  // may only use 0x3 and 0xf while other atomic operations
2423  // may use 0x1 and 0x3. However these limitations are
2424  // verified when we check that dmask matches dst size.
2425  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2426}
2427
2428bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2429
2430  const unsigned Opc = Inst.getOpcode();
2431  const MCInstrDesc &Desc = MII.get(Opc);
2432
2433  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2434    return true;
2435
2436  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2437  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2438
2439  // GATHER4 instructions use dmask in a different fashion compared to
2440  // other MIMG instructions. The only useful DMASK values are
2441  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2442  // (red,red,red,red) etc.) The ISA document doesn't mention
2443  // this.
2444  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2445}
2446
2447bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2448
2449  const unsigned Opc = Inst.getOpcode();
2450  const MCInstrDesc &Desc = MII.get(Opc);
2451
2452  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2453    return true;
2454
2455  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2456  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2457    if (isCI() || isSI())
2458      return false;
2459  }
2460
2461  return true;
2462}
2463
2464bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2465                                          const SMLoc &IDLoc) {
2466  if (!validateConstantBusLimitations(Inst)) {
2467    Error(IDLoc,
2468      "invalid operand (violates constant bus restrictions)");
2469    return false;
2470  }
2471  if (!validateEarlyClobberLimitations(Inst)) {
2472    Error(IDLoc,
2473      "destination must be different than all sources");
2474    return false;
2475  }
2476  if (!validateIntClampSupported(Inst)) {
2477    Error(IDLoc,
2478      "integer clamping is not supported on this GPU");
2479    return false;
2480  }
2481  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2482  if (!validateMIMGD16(Inst)) {
2483    Error(IDLoc,
2484      "d16 modifier is not supported on this GPU");
2485    return false;
2486  }
2487  if (!validateMIMGDataSize(Inst)) {
2488    Error(IDLoc,
2489      "image data size does not match dmask and tfe");
2490    return false;
2491  }
2492  if (!validateMIMGAtomicDMask(Inst)) {
2493    Error(IDLoc,
2494      "invalid atomic image dmask");
2495    return false;
2496  }
2497  if (!validateMIMGGatherDMask(Inst)) {
2498    Error(IDLoc,
2499      "invalid image_gather dmask: only one bit must be set");
2500    return false;
2501  }
2502
2503  return true;
2504}
2505
2506static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2507                                            unsigned VariantID = 0);
2508
2509bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2510                                              OperandVector &Operands,
2511                                              MCStreamer &Out,
2512                                              uint64_t &ErrorInfo,
2513                                              bool MatchingInlineAsm) {
2514  MCInst Inst;
2515  unsigned Result = Match_Success;
2516  for (auto Variant : getMatchedVariants()) {
2517    uint64_t EI;
2518    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2519                                  Variant);
2520    // We order match statuses from least to most specific. We use most specific
2521    // status as resulting
2522    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2523    if ((R == Match_Success) ||
2524        (R == Match_PreferE32) ||
2525        (R == Match_MissingFeature && Result != Match_PreferE32) ||
2526        (R == Match_InvalidOperand && Result != Match_MissingFeature
2527                                   && Result != Match_PreferE32) ||
2528        (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2529                                   && Result != Match_MissingFeature
2530                                   && Result != Match_PreferE32)) {
2531      Result = R;
2532      ErrorInfo = EI;
2533    }
2534    if (R == Match_Success)
2535      break;
2536  }
2537
2538  switch (Result) {
2539  default: break;
2540  case Match_Success:
2541    if (!validateInstruction(Inst, IDLoc)) {
2542      return true;
2543    }
2544    Inst.setLoc(IDLoc);
2545    Out.EmitInstruction(Inst, getSTI());
2546    return false;
2547
2548  case Match_MissingFeature:
2549    return Error(IDLoc, "instruction not supported on this GPU");
2550
2551  case Match_MnemonicFail: {
2552    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2553    std::string Suggestion = AMDGPUMnemonicSpellCheck(
2554        ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2555    return Error(IDLoc, "invalid instruction" + Suggestion,
2556                 ((AMDGPUOperand &)*Operands[0]).getLocRange());
2557  }
2558
2559  case Match_InvalidOperand: {
2560    SMLoc ErrorLoc = IDLoc;
2561    if (ErrorInfo != ~0ULL) {
2562      if (ErrorInfo >= Operands.size()) {
2563        return Error(IDLoc, "too few operands for instruction");
2564      }
2565      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2566      if (ErrorLoc == SMLoc())
2567        ErrorLoc = IDLoc;
2568    }
2569    return Error(ErrorLoc, "invalid operand for instruction");
2570  }
2571
2572  case Match_PreferE32:
2573    return Error(IDLoc, "internal error: instruction without _e64 suffix "
2574                        "should be encoded as e32");
2575  }
2576  llvm_unreachable("Implement any new match types added!");
2577}
2578
2579bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2580  int64_t Tmp = -1;
2581  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2582    return true;
2583  }
2584  if (getParser().parseAbsoluteExpression(Tmp)) {
2585    return true;
2586  }
2587  Ret = static_cast<uint32_t>(Tmp);
2588  return false;
2589}
2590
2591bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2592                                               uint32_t &Minor) {
2593  if (ParseAsAbsoluteExpression(Major))
2594    return TokError("invalid major version");
2595
2596  if (getLexer().isNot(AsmToken::Comma))
2597    return TokError("minor version number required, comma expected");
2598  Lex();
2599
2600  if (ParseAsAbsoluteExpression(Minor))
2601    return TokError("invalid minor version");
2602
2603  return false;
2604}
2605
2606bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2607  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2608    return TokError("directive only supported for amdgcn architecture");
2609
2610  std::string Target;
2611
2612  SMLoc TargetStart = getTok().getLoc();
2613  if (getParser().parseEscapedString(Target))
2614    return true;
2615  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2616
2617  std::string ExpectedTarget;
2618  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2619  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2620
2621  if (Target != ExpectedTargetOS.str())
2622    return getParser().Error(TargetRange.Start, "target must match options",
2623                             TargetRange);
2624
2625  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2626  return false;
2627}
2628
2629bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2630  return getParser().Error(Range.Start, "value out of range", Range);
2631}
2632
2633bool AMDGPUAsmParser::calculateGPRBlocks(
2634    const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2635    bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2636    unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2637    unsigned &SGPRBlocks) {
2638  // TODO(scott.linder): These calculations are duplicated from
2639  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2640  IsaVersion Version = getIsaVersion(getSTI().getCPU());
2641
2642  unsigned NumVGPRs = NextFreeVGPR;
2643  unsigned NumSGPRs = NextFreeSGPR;
2644  unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2645
2646  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2647      NumSGPRs > MaxAddressableNumSGPRs)
2648    return OutOfRangeError(SGPRRange);
2649
2650  NumSGPRs +=
2651      IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2652
2653  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2654      NumSGPRs > MaxAddressableNumSGPRs)
2655    return OutOfRangeError(SGPRRange);
2656
2657  if (Features.test(FeatureSGPRInitBug))
2658    NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2659
2660  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2661  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2662
2663  return false;
2664}
2665
2666bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2667  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2668    return TokError("directive only supported for amdgcn architecture");
2669
2670  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2671    return TokError("directive only supported for amdhsa OS");
2672
2673  StringRef KernelName;
2674  if (getParser().parseIdentifier(KernelName))
2675    return true;
2676
2677  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2678
2679  StringSet<> Seen;
2680
2681  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2682
2683  SMRange VGPRRange;
2684  uint64_t NextFreeVGPR = 0;
2685  SMRange SGPRRange;
2686  uint64_t NextFreeSGPR = 0;
2687  unsigned UserSGPRCount = 0;
2688  bool ReserveVCC = true;
2689  bool ReserveFlatScr = true;
2690  bool ReserveXNACK = hasXNACK();
2691
2692  while (true) {
2693    while (getLexer().is(AsmToken::EndOfStatement))
2694      Lex();
2695
2696    if (getLexer().isNot(AsmToken::Identifier))
2697      return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2698
2699    StringRef ID = getTok().getIdentifier();
2700    SMRange IDRange = getTok().getLocRange();
2701    Lex();
2702
2703    if (ID == ".end_amdhsa_kernel")
2704      break;
2705
2706    if (Seen.find(ID) != Seen.end())
2707      return TokError(".amdhsa_ directives cannot be repeated");
2708    Seen.insert(ID);
2709
2710    SMLoc ValStart = getTok().getLoc();
2711    int64_t IVal;
2712    if (getParser().parseAbsoluteExpression(IVal))
2713      return true;
2714    SMLoc ValEnd = getTok().getLoc();
2715    SMRange ValRange = SMRange(ValStart, ValEnd);
2716
2717    if (IVal < 0)
2718      return OutOfRangeError(ValRange);
2719
2720    uint64_t Val = IVal;
2721
2722#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2723  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2724    return OutOfRangeError(RANGE);                                             \
2725  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2726
2727    if (ID == ".amdhsa_group_segment_fixed_size") {
2728      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2729        return OutOfRangeError(ValRange);
2730      KD.group_segment_fixed_size = Val;
2731    } else if (ID == ".amdhsa_private_segment_fixed_size") {
2732      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2733        return OutOfRangeError(ValRange);
2734      KD.private_segment_fixed_size = Val;
2735    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2736      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2737                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2738                       Val, ValRange);
2739      UserSGPRCount++;
2740    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2741      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2742                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2743                       ValRange);
2744      UserSGPRCount++;
2745    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2746      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2747                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2748                       ValRange);
2749      UserSGPRCount++;
2750    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2751      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2752                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2753                       Val, ValRange);
2754      UserSGPRCount++;
2755    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2756      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2757                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2758                       ValRange);
2759      UserSGPRCount++;
2760    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2761      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2762                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2763                       ValRange);
2764      UserSGPRCount++;
2765    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2766      PARSE_BITS_ENTRY(KD.kernel_code_properties,
2767                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2768                       Val, ValRange);
2769      UserSGPRCount++;
2770    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2771      PARSE_BITS_ENTRY(
2772          KD.compute_pgm_rsrc2,
2773          COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2774          ValRange);
2775    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2776      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2777                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2778                       ValRange);
2779    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2780      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2781                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2782                       ValRange);
2783    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2784      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2785                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2786                       ValRange);
2787    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2788      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2789                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2790                       ValRange);
2791    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2792      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2793                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2794                       ValRange);
2795    } else if (ID == ".amdhsa_next_free_vgpr") {
2796      VGPRRange = ValRange;
2797      NextFreeVGPR = Val;
2798    } else if (ID == ".amdhsa_next_free_sgpr") {
2799      SGPRRange = ValRange;
2800      NextFreeSGPR = Val;
2801    } else if (ID == ".amdhsa_reserve_vcc") {
2802      if (!isUInt<1>(Val))
2803        return OutOfRangeError(ValRange);
2804      ReserveVCC = Val;
2805    } else if (ID == ".amdhsa_reserve_flat_scratch") {
2806      if (IVersion.Major < 7)
2807        return getParser().Error(IDRange.Start, "directive requires gfx7+",
2808                                 IDRange);
2809      if (!isUInt<1>(Val))
2810        return OutOfRangeError(ValRange);
2811      ReserveFlatScr = Val;
2812    } else if (ID == ".amdhsa_reserve_xnack_mask") {
2813      if (IVersion.Major < 8)
2814        return getParser().Error(IDRange.Start, "directive requires gfx8+",
2815                                 IDRange);
2816      if (!isUInt<1>(Val))
2817        return OutOfRangeError(ValRange);
2818      ReserveXNACK = Val;
2819    } else if (ID == ".amdhsa_float_round_mode_32") {
2820      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2821                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2822    } else if (ID == ".amdhsa_float_round_mode_16_64") {
2823      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2824                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2825    } else if (ID == ".amdhsa_float_denorm_mode_32") {
2826      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2827                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2828    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2829      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2830                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2831                       ValRange);
2832    } else if (ID == ".amdhsa_dx10_clamp") {
2833      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2834                       COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2835    } else if (ID == ".amdhsa_ieee_mode") {
2836      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2837                       Val, ValRange);
2838    } else if (ID == ".amdhsa_fp16_overflow") {
2839      if (IVersion.Major < 9)
2840        return getParser().Error(IDRange.Start, "directive requires gfx9+",
2841                                 IDRange);
2842      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2843                       ValRange);
2844    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2845      PARSE_BITS_ENTRY(
2846          KD.compute_pgm_rsrc2,
2847          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2848          ValRange);
2849    } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2850      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2851                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2852                       Val, ValRange);
2853    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2854      PARSE_BITS_ENTRY(
2855          KD.compute_pgm_rsrc2,
2856          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2857          ValRange);
2858    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2859      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2860                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2861                       Val, ValRange);
2862    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2863      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2864                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2865                       Val, ValRange);
2866    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2867      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2868                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2869                       Val, ValRange);
2870    } else if (ID == ".amdhsa_exception_int_div_zero") {
2871      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2872                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
2873                       Val, ValRange);
2874    } else {
2875      return getParser().Error(IDRange.Start,
2876                               "unknown .amdhsa_kernel directive", IDRange);
2877    }
2878
2879#undef PARSE_BITS_ENTRY
2880  }
2881
2882  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
2883    return TokError(".amdhsa_next_free_vgpr directive is required");
2884
2885  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
2886    return TokError(".amdhsa_next_free_sgpr directive is required");
2887
2888  unsigned VGPRBlocks;
2889  unsigned SGPRBlocks;
2890  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
2891                         ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
2892                         SGPRRange, VGPRBlocks, SGPRBlocks))
2893    return true;
2894
2895  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
2896          VGPRBlocks))
2897    return OutOfRangeError(VGPRRange);
2898  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2899                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
2900
2901  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
2902          SGPRBlocks))
2903    return OutOfRangeError(SGPRRange);
2904  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2905                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2906                  SGPRBlocks);
2907
2908  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
2909    return TokError("too many user SGPRs enabled");
2910  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
2911                  UserSGPRCount);
2912
2913  getTargetStreamer().EmitAmdhsaKernelDescriptor(
2914      getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
2915      ReserveFlatScr, ReserveXNACK);
2916  return false;
2917}
2918
2919bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2920  uint32_t Major;
2921  uint32_t Minor;
2922
2923  if (ParseDirectiveMajorMinor(Major, Minor))
2924    return true;
2925
2926  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2927  return false;
2928}
2929
2930bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2931  uint32_t Major;
2932  uint32_t Minor;
2933  uint32_t Stepping;
2934  StringRef VendorName;
2935  StringRef ArchName;
2936
2937  // If this directive has no arguments, then use the ISA version for the
2938  // targeted GPU.
2939  if (getLexer().is(AsmToken::EndOfStatement)) {
2940    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
2941    getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2942                                                      ISA.Stepping,
2943                                                      "AMD", "AMDGPU");
2944    return false;
2945  }
2946
2947  if (ParseDirectiveMajorMinor(Major, Minor))
2948    return true;
2949
2950  if (getLexer().isNot(AsmToken::Comma))
2951    return TokError("stepping version number required, comma expected");
2952  Lex();
2953
2954  if (ParseAsAbsoluteExpression(Stepping))
2955    return TokError("invalid stepping version");
2956
2957  if (getLexer().isNot(AsmToken::Comma))
2958    return TokError("vendor name required, comma expected");
2959  Lex();
2960
2961  if (getLexer().isNot(AsmToken::String))
2962    return TokError("invalid vendor name");
2963
2964  VendorName = getLexer().getTok().getStringContents();
2965  Lex();
2966
2967  if (getLexer().isNot(AsmToken::Comma))
2968    return TokError("arch name required, comma expected");
2969  Lex();
2970
2971  if (getLexer().isNot(AsmToken::String))
2972    return TokError("invalid arch name");
2973
2974  ArchName = getLexer().getTok().getStringContents();
2975  Lex();
2976
2977  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2978                                                    VendorName, ArchName);
2979  return false;
2980}
2981
2982bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2983                                               amd_kernel_code_t &Header) {
2984  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
2985  // assembly for backwards compatibility.
2986  if (ID == "max_scratch_backing_memory_byte_size") {
2987    Parser.eatToEndOfStatement();
2988    return false;
2989  }
2990
2991  SmallString<40> ErrStr;
2992  raw_svector_ostream Err(ErrStr);
2993  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2994    return TokError(Err.str());
2995  }
2996  Lex();
2997  return false;
2998}
2999
3000bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3001  amd_kernel_code_t Header;
3002  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3003
3004  while (true) {
3005    // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3006    // will set the current token to EndOfStatement.
3007    while(getLexer().is(AsmToken::EndOfStatement))
3008      Lex();
3009
3010    if (getLexer().isNot(AsmToken::Identifier))
3011      return TokError("expected value identifier or .end_amd_kernel_code_t");
3012
3013    StringRef ID = getLexer().getTok().getIdentifier();
3014    Lex();
3015
3016    if (ID == ".end_amd_kernel_code_t")
3017      break;
3018
3019    if (ParseAMDKernelCodeTValue(ID, Header))
3020      return true;
3021  }
3022
3023  getTargetStreamer().EmitAMDKernelCodeT(Header);
3024
3025  return false;
3026}
3027
3028bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3029  if (getLexer().isNot(AsmToken::Identifier))
3030    return TokError("expected symbol name");
3031
3032  StringRef KernelName = Parser.getTok().getString();
3033
3034  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3035                                           ELF::STT_AMDGPU_HSA_KERNEL);
3036  Lex();
3037  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3038    KernelScope.initialize(getContext());
3039  return false;
3040}
3041
3042bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3043  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3044    return Error(getParser().getTok().getLoc(),
3045                 ".amd_amdgpu_isa directive is not available on non-amdgcn "
3046                 "architectures");
3047  }
3048
3049  auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3050
3051  std::string ISAVersionStringFromSTI;
3052  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3053  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3054
3055  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3056    return Error(getParser().getTok().getLoc(),
3057                 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3058                 "arguments specified through the command line");
3059  }
3060
3061  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3062  Lex();
3063
3064  return false;
3065}
3066
3067bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3068  const char *AssemblerDirectiveBegin;
3069  const char *AssemblerDirectiveEnd;
3070  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3071      AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3072          ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3073                            HSAMD::V3::AssemblerDirectiveEnd)
3074          : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3075                            HSAMD::AssemblerDirectiveEnd);
3076
3077  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3078    return Error(getParser().getTok().getLoc(),
3079                 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3080                 "not available on non-amdhsa OSes")).str());
3081  }
3082
3083  std::string HSAMetadataString;
3084  raw_string_ostream YamlStream(HSAMetadataString);
3085
3086  getLexer().setSkipSpace(false);
3087
3088  bool FoundEnd = false;
3089  while (!getLexer().is(AsmToken::Eof)) {
3090    while (getLexer().is(AsmToken::Space)) {
3091      YamlStream << getLexer().getTok().getString();
3092      Lex();
3093    }
3094
3095    if (getLexer().is(AsmToken::Identifier)) {
3096      StringRef ID = getLexer().getTok().getIdentifier();
3097      if (ID == AssemblerDirectiveEnd) {
3098        Lex();
3099        FoundEnd = true;
3100        break;
3101      }
3102    }
3103
3104    YamlStream << Parser.parseStringToEndOfStatement()
3105               << getContext().getAsmInfo()->getSeparatorString();
3106
3107    Parser.eatToEndOfStatement();
3108  }
3109
3110  getLexer().setSkipSpace(true);
3111
3112  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3113    return TokError(Twine("expected directive ") +
3114                    Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3115  }
3116
3117  YamlStream.flush();
3118
3119  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3120    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3121      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3122  } else {
3123    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3124      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3125  }
3126
3127  return false;
3128}
3129
3130bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3131  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3132    return Error(getParser().getTok().getLoc(),
3133                 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3134                 "not available on non-amdpal OSes")).str());
3135  }
3136
3137  PALMD::Metadata PALMetadata;
3138  for (;;) {
3139    uint32_t Value;
3140    if (ParseAsAbsoluteExpression(Value)) {
3141      return TokError(Twine("invalid value in ") +
3142                      Twine(PALMD::AssemblerDirective));
3143    }
3144    PALMetadata.push_back(Value);
3145    if (getLexer().isNot(AsmToken::Comma))
3146      break;
3147    Lex();
3148  }
3149  getTargetStreamer().EmitPALMetadata(PALMetadata);
3150  return false;
3151}
3152
3153bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3154  StringRef IDVal = DirectiveID.getString();
3155
3156  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3157    if (IDVal == ".amdgcn_target")
3158      return ParseDirectiveAMDGCNTarget();
3159
3160    if (IDVal == ".amdhsa_kernel")
3161      return ParseDirectiveAMDHSAKernel();
3162
3163    // TODO: Restructure/combine with PAL metadata directive.
3164    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3165      return ParseDirectiveHSAMetadata();
3166  } else {
3167    if (IDVal == ".hsa_code_object_version")
3168      return ParseDirectiveHSACodeObjectVersion();
3169
3170    if (IDVal == ".hsa_code_object_isa")
3171      return ParseDirectiveHSACodeObjectISA();
3172
3173    if (IDVal == ".amd_kernel_code_t")
3174      return ParseDirectiveAMDKernelCodeT();
3175
3176    if (IDVal == ".amdgpu_hsa_kernel")
3177      return ParseDirectiveAMDGPUHsaKernel();
3178
3179    if (IDVal == ".amd_amdgpu_isa")
3180      return ParseDirectiveISAVersion();
3181
3182    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3183      return ParseDirectiveHSAMetadata();
3184  }
3185
3186  if (IDVal == PALMD::AssemblerDirective)
3187    return ParseDirectivePALMetadata();
3188
3189  return true;
3190}
3191
3192bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3193                                           unsigned RegNo) const {
3194
3195  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3196       R.isValid(); ++R) {
3197    if (*R == RegNo)
3198      return isGFX9();
3199  }
3200
3201  switch (RegNo) {
3202  case AMDGPU::TBA:
3203  case AMDGPU::TBA_LO:
3204  case AMDGPU::TBA_HI:
3205  case AMDGPU::TMA:
3206  case AMDGPU::TMA_LO:
3207  case AMDGPU::TMA_HI:
3208    return !isGFX9();
3209  case AMDGPU::XNACK_MASK:
3210  case AMDGPU::XNACK_MASK_LO:
3211  case AMDGPU::XNACK_MASK_HI:
3212    return !isCI() && !isSI() && hasXNACK();
3213  default:
3214    break;
3215  }
3216
3217  if (isCI())
3218    return true;
3219
3220  if (isSI()) {
3221    // No flat_scr
3222    switch (RegNo) {
3223    case AMDGPU::FLAT_SCR:
3224    case AMDGPU::FLAT_SCR_LO:
3225    case AMDGPU::FLAT_SCR_HI:
3226      return false;
3227    default:
3228      return true;
3229    }
3230  }
3231
3232  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3233  // SI/CI have.
3234  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3235       R.isValid(); ++R) {
3236    if (*R == RegNo)
3237      return false;
3238  }
3239
3240  return true;
3241}
3242
3243OperandMatchResultTy
3244AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3245  // Try to parse with a custom parser
3246  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3247
3248  // If we successfully parsed the operand or if there as an error parsing,
3249  // we are done.
3250  //
3251  // If we are parsing after we reach EndOfStatement then this means we
3252  // are appending default values to the Operands list.  This is only done
3253  // by custom parser, so we shouldn't continue on to the generic parsing.
3254  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3255      getLexer().is(AsmToken::EndOfStatement))
3256    return ResTy;
3257
3258  ResTy = parseRegOrImm(Operands);
3259
3260  if (ResTy == MatchOperand_Success)
3261    return ResTy;
3262
3263  const auto &Tok = Parser.getTok();
3264  SMLoc S = Tok.getLoc();
3265
3266  const MCExpr *Expr = nullptr;
3267  if (!Parser.parseExpression(Expr)) {
3268    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3269    return MatchOperand_Success;
3270  }
3271
3272  // Possibly this is an instruction flag like 'gds'.
3273  if (Tok.getKind() == AsmToken::Identifier) {
3274    Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3275    Parser.Lex();
3276    return MatchOperand_Success;
3277  }
3278
3279  return MatchOperand_NoMatch;
3280}
3281
3282StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3283  // Clear any forced encodings from the previous instruction.
3284  setForcedEncodingSize(0);
3285  setForcedDPP(false);
3286  setForcedSDWA(false);
3287
3288  if (Name.endswith("_e64")) {
3289    setForcedEncodingSize(64);
3290    return Name.substr(0, Name.size() - 4);
3291  } else if (Name.endswith("_e32")) {
3292    setForcedEncodingSize(32);
3293    return Name.substr(0, Name.size() - 4);
3294  } else if (Name.endswith("_dpp")) {
3295    setForcedDPP(true);
3296    return Name.substr(0, Name.size() - 4);
3297  } else if (Name.endswith("_sdwa")) {
3298    setForcedSDWA(true);
3299    return Name.substr(0, Name.size() - 5);
3300  }
3301  return Name;
3302}
3303
3304bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3305                                       StringRef Name,
3306                                       SMLoc NameLoc, OperandVector &Operands) {
3307  // Add the instruction mnemonic
3308  Name = parseMnemonicSuffix(Name);
3309  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3310
3311  while (!getLexer().is(AsmToken::EndOfStatement)) {
3312    OperandMatchResultTy Res = parseOperand(Operands, Name);
3313
3314    // Eat the comma or space if there is one.
3315    if (getLexer().is(AsmToken::Comma))
3316      Parser.Lex();
3317
3318    switch (Res) {
3319      case MatchOperand_Success: break;
3320      case MatchOperand_ParseFail:
3321        Error(getLexer().getLoc(), "failed parsing operand.");
3322        while (!getLexer().is(AsmToken::EndOfStatement)) {
3323          Parser.Lex();
3324        }
3325        return true;
3326      case MatchOperand_NoMatch:
3327        Error(getLexer().getLoc(), "not a valid operand.");
3328        while (!getLexer().is(AsmToken::EndOfStatement)) {
3329          Parser.Lex();
3330        }
3331        return true;
3332    }
3333  }
3334
3335  return false;
3336}
3337
3338//===----------------------------------------------------------------------===//
3339// Utility functions
3340//===----------------------------------------------------------------------===//
3341
3342OperandMatchResultTy
3343AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3344  switch(getLexer().getKind()) {
3345    default: return MatchOperand_NoMatch;
3346    case AsmToken::Identifier: {
3347      StringRef Name = Parser.getTok().getString();
3348      if (!Name.equals(Prefix)) {
3349        return MatchOperand_NoMatch;
3350      }
3351
3352      Parser.Lex();
3353      if (getLexer().isNot(AsmToken::Colon))
3354        return MatchOperand_ParseFail;
3355
3356      Parser.Lex();
3357
3358      bool IsMinus = false;
3359      if (getLexer().getKind() == AsmToken::Minus) {
3360        Parser.Lex();
3361        IsMinus = true;
3362      }
3363
3364      if (getLexer().isNot(AsmToken::Integer))
3365        return MatchOperand_ParseFail;
3366
3367      if (getParser().parseAbsoluteExpression(Int))
3368        return MatchOperand_ParseFail;
3369
3370      if (IsMinus)
3371        Int = -Int;
3372      break;
3373    }
3374  }
3375  return MatchOperand_Success;
3376}
3377
3378OperandMatchResultTy
3379AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3380                                    AMDGPUOperand::ImmTy ImmTy,
3381                                    bool (*ConvertResult)(int64_t&)) {
3382  SMLoc S = Parser.getTok().getLoc();
3383  int64_t Value = 0;
3384
3385  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3386  if (Res != MatchOperand_Success)
3387    return Res;
3388
3389  if (ConvertResult && !ConvertResult(Value)) {
3390    return MatchOperand_ParseFail;
3391  }
3392
3393  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3394  return MatchOperand_Success;
3395}
3396
3397OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3398  const char *Prefix,
3399  OperandVector &Operands,
3400  AMDGPUOperand::ImmTy ImmTy,
3401  bool (*ConvertResult)(int64_t&)) {
3402  StringRef Name = Parser.getTok().getString();
3403  if (!Name.equals(Prefix))
3404    return MatchOperand_NoMatch;
3405
3406  Parser.Lex();
3407  if (getLexer().isNot(AsmToken::Colon))
3408    return MatchOperand_ParseFail;
3409
3410  Parser.Lex();
3411  if (getLexer().isNot(AsmToken::LBrac))
3412    return MatchOperand_ParseFail;
3413  Parser.Lex();
3414
3415  unsigned Val = 0;
3416  SMLoc S = Parser.getTok().getLoc();
3417
3418  // FIXME: How to verify the number of elements matches the number of src
3419  // operands?
3420  for (int I = 0; I < 4; ++I) {
3421    if (I != 0) {
3422      if (getLexer().is(AsmToken::RBrac))
3423        break;
3424
3425      if (getLexer().isNot(AsmToken::Comma))
3426        return MatchOperand_ParseFail;
3427      Parser.Lex();
3428    }
3429
3430    if (getLexer().isNot(AsmToken::Integer))
3431      return MatchOperand_ParseFail;
3432
3433    int64_t Op;
3434    if (getParser().parseAbsoluteExpression(Op))
3435      return MatchOperand_ParseFail;
3436
3437    if (Op != 0 && Op != 1)
3438      return MatchOperand_ParseFail;
3439    Val |= (Op << I);
3440  }
3441
3442  Parser.Lex();
3443  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3444  return MatchOperand_Success;
3445}
3446
3447OperandMatchResultTy
3448AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3449                               AMDGPUOperand::ImmTy ImmTy) {
3450  int64_t Bit = 0;
3451  SMLoc S = Parser.getTok().getLoc();
3452
3453  // We are at the end of the statement, and this is a default argument, so
3454  // use a default value.
3455  if (getLexer().isNot(AsmToken::EndOfStatement)) {
3456    switch(getLexer().getKind()) {
3457      case AsmToken::Identifier: {
3458        StringRef Tok = Parser.getTok().getString();
3459        if (Tok == Name) {
3460          if (Tok == "r128" && isGFX9())
3461            Error(S, "r128 modifier is not supported on this GPU");
3462          if (Tok == "a16" && !isGFX9())
3463            Error(S, "a16 modifier is not supported on this GPU");
3464          Bit = 1;
3465          Parser.Lex();
3466        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3467          Bit = 0;
3468          Parser.Lex();
3469        } else {
3470          return MatchOperand_NoMatch;
3471        }
3472        break;
3473      }
3474      default:
3475        return MatchOperand_NoMatch;
3476    }
3477  }
3478
3479  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3480  return MatchOperand_Success;
3481}
3482
3483static void addOptionalImmOperand(
3484  MCInst& Inst, const OperandVector& Operands,
3485  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3486  AMDGPUOperand::ImmTy ImmT,
3487  int64_t Default = 0) {
3488  auto i = OptionalIdx.find(ImmT);
3489  if (i != OptionalIdx.end()) {
3490    unsigned Idx = i->second;
3491    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3492  } else {
3493    Inst.addOperand(MCOperand::createImm(Default));
3494  }
3495}
3496
3497OperandMatchResultTy
3498AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3499  if (getLexer().isNot(AsmToken::Identifier)) {
3500    return MatchOperand_NoMatch;
3501  }
3502  StringRef Tok = Parser.getTok().getString();
3503  if (Tok != Prefix) {
3504    return MatchOperand_NoMatch;
3505  }
3506
3507  Parser.Lex();
3508  if (getLexer().isNot(AsmToken::Colon)) {
3509    return MatchOperand_ParseFail;
3510  }
3511
3512  Parser.Lex();
3513  if (getLexer().isNot(AsmToken::Identifier)) {
3514    return MatchOperand_ParseFail;
3515  }
3516
3517  Value = Parser.getTok().getString();
3518  return MatchOperand_Success;
3519}
3520
3521// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3522// values to live in a joint format operand in the MCInst encoding.
3523OperandMatchResultTy
3524AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3525  SMLoc S = Parser.getTok().getLoc();
3526  int64_t Dfmt = 0, Nfmt = 0;
3527  // dfmt and nfmt can appear in either order, and each is optional.
3528  bool GotDfmt = false, GotNfmt = false;
3529  while (!GotDfmt || !GotNfmt) {
3530    if (!GotDfmt) {
3531      auto Res = parseIntWithPrefix("dfmt", Dfmt);
3532      if (Res != MatchOperand_NoMatch) {
3533        if (Res != MatchOperand_Success)
3534          return Res;
3535        if (Dfmt >= 16) {
3536          Error(Parser.getTok().getLoc(), "out of range dfmt");
3537          return MatchOperand_ParseFail;
3538        }
3539        GotDfmt = true;
3540        Parser.Lex();
3541        continue;
3542      }
3543    }
3544    if (!GotNfmt) {
3545      auto Res = parseIntWithPrefix("nfmt", Nfmt);
3546      if (Res != MatchOperand_NoMatch) {
3547        if (Res != MatchOperand_Success)
3548          return Res;
3549        if (Nfmt >= 8) {
3550          Error(Parser.getTok().getLoc(), "out of range nfmt");
3551          return MatchOperand_ParseFail;
3552        }
3553        GotNfmt = true;
3554        Parser.Lex();
3555        continue;
3556      }
3557    }
3558    break;
3559  }
3560  if (!GotDfmt && !GotNfmt)
3561    return MatchOperand_NoMatch;
3562  auto Format = Dfmt | Nfmt << 4;
3563  Operands.push_back(
3564      AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3565  return MatchOperand_Success;
3566}
3567
3568//===----------------------------------------------------------------------===//
3569// ds
3570//===----------------------------------------------------------------------===//
3571
3572void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3573                                    const OperandVector &Operands) {
3574  OptionalImmIndexMap OptionalIdx;
3575
3576  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3577    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3578
3579    // Add the register arguments
3580    if (Op.isReg()) {
3581      Op.addRegOperands(Inst, 1);
3582      continue;
3583    }
3584
3585    // Handle optional arguments
3586    OptionalIdx[Op.getImmTy()] = i;
3587  }
3588
3589  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3590  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3591  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3592
3593  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3594}
3595
3596void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3597                                bool IsGdsHardcoded) {
3598  OptionalImmIndexMap OptionalIdx;
3599
3600  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3601    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3602
3603    // Add the register arguments
3604    if (Op.isReg()) {
3605      Op.addRegOperands(Inst, 1);
3606      continue;
3607    }
3608
3609    if (Op.isToken() && Op.getToken() == "gds") {
3610      IsGdsHardcoded = true;
3611      continue;
3612    }
3613
3614    // Handle optional arguments
3615    OptionalIdx[Op.getImmTy()] = i;
3616  }
3617
3618  AMDGPUOperand::ImmTy OffsetType =
3619    (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3620     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3621                                                      AMDGPUOperand::ImmTyOffset;
3622
3623  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3624
3625  if (!IsGdsHardcoded) {
3626    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3627  }
3628  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3629}
3630
3631void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3632  OptionalImmIndexMap OptionalIdx;
3633
3634  unsigned OperandIdx[4];
3635  unsigned EnMask = 0;
3636  int SrcIdx = 0;
3637
3638  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3639    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3640
3641    // Add the register arguments
3642    if (Op.isReg()) {
3643      assert(SrcIdx < 4);
3644      OperandIdx[SrcIdx] = Inst.size();
3645      Op.addRegOperands(Inst, 1);
3646      ++SrcIdx;
3647      continue;
3648    }
3649
3650    if (Op.isOff()) {
3651      assert(SrcIdx < 4);
3652      OperandIdx[SrcIdx] = Inst.size();
3653      Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3654      ++SrcIdx;
3655      continue;
3656    }
3657
3658    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3659      Op.addImmOperands(Inst, 1);
3660      continue;
3661    }
3662
3663    if (Op.isToken() && Op.getToken() == "done")
3664      continue;
3665
3666    // Handle optional arguments
3667    OptionalIdx[Op.getImmTy()] = i;
3668  }
3669
3670  assert(SrcIdx == 4);
3671
3672  bool Compr = false;
3673  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3674    Compr = true;
3675    Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3676    Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3677    Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3678  }
3679
3680  for (auto i = 0; i < SrcIdx; ++i) {
3681    if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3682      EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3683    }
3684  }
3685
3686  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3687  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3688
3689  Inst.addOperand(MCOperand::createImm(EnMask));
3690}
3691
3692//===----------------------------------------------------------------------===//
3693// s_waitcnt
3694//===----------------------------------------------------------------------===//
3695
3696static bool
3697encodeCnt(
3698  const AMDGPU::IsaVersion ISA,
3699  int64_t &IntVal,
3700  int64_t CntVal,
3701  bool Saturate,
3702  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3703  unsigned (*decode)(const IsaVersion &Version, unsigned))
3704{
3705  bool Failed = false;
3706
3707  IntVal = encode(ISA, IntVal, CntVal);
3708  if (CntVal != decode(ISA, IntVal)) {
3709    if (Saturate) {
3710      IntVal = encode(ISA, IntVal, -1);
3711    } else {
3712      Failed = true;
3713    }
3714  }
3715  return Failed;
3716}
3717
3718bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3719  StringRef CntName = Parser.getTok().getString();
3720  int64_t CntVal;
3721
3722  Parser.Lex();
3723  if (getLexer().isNot(AsmToken::LParen))
3724    return true;
3725
3726  Parser.Lex();
3727  if (getLexer().isNot(AsmToken::Integer))
3728    return true;
3729
3730  SMLoc ValLoc = Parser.getTok().getLoc();
3731  if (getParser().parseAbsoluteExpression(CntVal))
3732    return true;
3733
3734  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3735
3736  bool Failed = true;
3737  bool Sat = CntName.endswith("_sat");
3738
3739  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3740    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3741  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3742    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3743  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3744    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3745  }
3746
3747  if (Failed) {
3748    Error(ValLoc, "too large value for " + CntName);
3749    return true;
3750  }
3751
3752  if (getLexer().isNot(AsmToken::RParen)) {
3753    return true;
3754  }
3755
3756  Parser.Lex();
3757  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3758    const AsmToken NextToken = getLexer().peekTok();
3759    if (NextToken.is(AsmToken::Identifier)) {
3760      Parser.Lex();
3761    }
3762  }
3763
3764  return false;
3765}
3766
3767OperandMatchResultTy
3768AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3769  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3770  int64_t Waitcnt = getWaitcntBitMask(ISA);
3771  SMLoc S = Parser.getTok().getLoc();
3772
3773  switch(getLexer().getKind()) {
3774    default: return MatchOperand_ParseFail;
3775    case AsmToken::Integer:
3776      // The operand can be an integer value.
3777      if (getParser().parseAbsoluteExpression(Waitcnt))
3778        return MatchOperand_ParseFail;
3779      break;
3780
3781    case AsmToken::Identifier:
3782      do {
3783        if (parseCnt(Waitcnt))
3784          return MatchOperand_ParseFail;
3785      } while(getLexer().isNot(AsmToken::EndOfStatement));
3786      break;
3787  }
3788  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3789  return MatchOperand_Success;
3790}
3791
3792bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3793                                          int64_t &Width) {
3794  using namespace llvm::AMDGPU::Hwreg;
3795
3796  if (Parser.getTok().getString() != "hwreg")
3797    return true;
3798  Parser.Lex();
3799
3800  if (getLexer().isNot(AsmToken::LParen))
3801    return true;
3802  Parser.Lex();
3803
3804  if (getLexer().is(AsmToken::Identifier)) {
3805    HwReg.IsSymbolic = true;
3806    HwReg.Id = ID_UNKNOWN_;
3807    const StringRef tok = Parser.getTok().getString();
3808    int Last = ID_SYMBOLIC_LAST_;
3809    if (isSI() || isCI() || isVI())
3810      Last = ID_SYMBOLIC_FIRST_GFX9_;
3811    for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3812      if (tok == IdSymbolic[i]) {
3813        HwReg.Id = i;
3814        break;
3815      }
3816    }
3817    Parser.Lex();
3818  } else {
3819    HwReg.IsSymbolic = false;
3820    if (getLexer().isNot(AsmToken::Integer))
3821      return true;
3822    if (getParser().parseAbsoluteExpression(HwReg.Id))
3823      return true;
3824  }
3825
3826  if (getLexer().is(AsmToken::RParen)) {
3827    Parser.Lex();
3828    return false;
3829  }
3830
3831  // optional params
3832  if (getLexer().isNot(AsmToken::Comma))
3833    return true;
3834  Parser.Lex();
3835
3836  if (getLexer().isNot(AsmToken::Integer))
3837    return true;
3838  if (getParser().parseAbsoluteExpression(Offset))
3839    return true;
3840
3841  if (getLexer().isNot(AsmToken::Comma))
3842    return true;
3843  Parser.Lex();
3844
3845  if (getLexer().isNot(AsmToken::Integer))
3846    return true;
3847  if (getParser().parseAbsoluteExpression(Width))
3848    return true;
3849
3850  if (getLexer().isNot(AsmToken::RParen))
3851    return true;
3852  Parser.Lex();
3853
3854  return false;
3855}
3856
3857OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3858  using namespace llvm::AMDGPU::Hwreg;
3859
3860  int64_t Imm16Val = 0;
3861  SMLoc S = Parser.getTok().getLoc();
3862
3863  switch(getLexer().getKind()) {
3864    default: return MatchOperand_NoMatch;
3865    case AsmToken::Integer:
3866      // The operand can be an integer value.
3867      if (getParser().parseAbsoluteExpression(Imm16Val))
3868        return MatchOperand_NoMatch;
3869      if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3870        Error(S, "invalid immediate: only 16-bit values are legal");
3871        // Do not return error code, but create an imm operand anyway and proceed
3872        // to the next operand, if any. That avoids unneccessary error messages.
3873      }
3874      break;
3875
3876    case AsmToken::Identifier: {
3877        OperandInfoTy HwReg(ID_UNKNOWN_);
3878        int64_t Offset = OFFSET_DEFAULT_;
3879        int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3880        if (parseHwregConstruct(HwReg, Offset, Width))
3881          return MatchOperand_ParseFail;
3882        if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3883          if (HwReg.IsSymbolic)
3884            Error(S, "invalid symbolic name of hardware register");
3885          else
3886            Error(S, "invalid code of hardware register: only 6-bit values are legal");
3887        }
3888        if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3889          Error(S, "invalid bit offset: only 5-bit values are legal");
3890        if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3891          Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3892        Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3893      }
3894      break;
3895  }
3896  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3897  return MatchOperand_Success;
3898}
3899
3900bool AMDGPUOperand::isSWaitCnt() const {
3901  return isImm();
3902}
3903
3904bool AMDGPUOperand::isHwreg() const {
3905  return isImmTy(ImmTyHwreg);
3906}
3907
3908bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3909  using namespace llvm::AMDGPU::SendMsg;
3910
3911  if (Parser.getTok().getString() != "sendmsg")
3912    return true;
3913  Parser.Lex();
3914
3915  if (getLexer().isNot(AsmToken::LParen))
3916    return true;
3917  Parser.Lex();
3918
3919  if (getLexer().is(AsmToken::Identifier)) {
3920    Msg.IsSymbolic = true;
3921    Msg.Id = ID_UNKNOWN_;
3922    const std::string tok = Parser.getTok().getString();
3923    for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3924      switch(i) {
3925        default: continue; // Omit gaps.
3926        case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3927      }
3928      if (tok == IdSymbolic[i]) {
3929        Msg.Id = i;
3930        break;
3931      }
3932    }
3933    Parser.Lex();
3934  } else {
3935    Msg.IsSymbolic = false;
3936    if (getLexer().isNot(AsmToken::Integer))
3937      return true;
3938    if (getParser().parseAbsoluteExpression(Msg.Id))
3939      return true;
3940    if (getLexer().is(AsmToken::Integer))
3941      if (getParser().parseAbsoluteExpression(Msg.Id))
3942        Msg.Id = ID_UNKNOWN_;
3943  }
3944  if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3945    return false;
3946
3947  if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3948    if (getLexer().isNot(AsmToken::RParen))
3949      return true;
3950    Parser.Lex();
3951    return false;
3952  }
3953
3954  if (getLexer().isNot(AsmToken::Comma))
3955    return true;
3956  Parser.Lex();
3957
3958  assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3959  Operation.Id = ID_UNKNOWN_;
3960  if (getLexer().is(AsmToken::Identifier)) {
3961    Operation.IsSymbolic = true;
3962    const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3963    const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3964    const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3965    const StringRef Tok = Parser.getTok().getString();
3966    for (int i = F; i < L; ++i) {
3967      if (Tok == S[i]) {
3968        Operation.Id = i;
3969        break;
3970      }
3971    }
3972    Parser.Lex();
3973  } else {
3974    Operation.IsSymbolic = false;
3975    if (getLexer().isNot(AsmToken::Integer))
3976      return true;
3977    if (getParser().parseAbsoluteExpression(Operation.Id))
3978      return true;
3979  }
3980
3981  if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3982    // Stream id is optional.
3983    if (getLexer().is(AsmToken::RParen)) {
3984      Parser.Lex();
3985      return false;
3986    }
3987
3988    if (getLexer().isNot(AsmToken::Comma))
3989      return true;
3990    Parser.Lex();
3991
3992    if (getLexer().isNot(AsmToken::Integer))
3993      return true;
3994    if (getParser().parseAbsoluteExpression(StreamId))
3995      return true;
3996  }
3997
3998  if (getLexer().isNot(AsmToken::RParen))
3999    return true;
4000  Parser.Lex();
4001  return false;
4002}
4003
4004OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4005  if (getLexer().getKind() != AsmToken::Identifier)
4006    return MatchOperand_NoMatch;
4007
4008  StringRef Str = Parser.getTok().getString();
4009  int Slot = StringSwitch<int>(Str)
4010    .Case("p10", 0)
4011    .Case("p20", 1)
4012    .Case("p0", 2)
4013    .Default(-1);
4014
4015  SMLoc S = Parser.getTok().getLoc();
4016  if (Slot == -1)
4017    return MatchOperand_ParseFail;
4018
4019  Parser.Lex();
4020  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4021                                              AMDGPUOperand::ImmTyInterpSlot));
4022  return MatchOperand_Success;
4023}
4024
4025OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4026  if (getLexer().getKind() != AsmToken::Identifier)
4027    return MatchOperand_NoMatch;
4028
4029  StringRef Str = Parser.getTok().getString();
4030  if (!Str.startswith("attr"))
4031    return MatchOperand_NoMatch;
4032
4033  StringRef Chan = Str.take_back(2);
4034  int AttrChan = StringSwitch<int>(Chan)
4035    .Case(".x", 0)
4036    .Case(".y", 1)
4037    .Case(".z", 2)
4038    .Case(".w", 3)
4039    .Default(-1);
4040  if (AttrChan == -1)
4041    return MatchOperand_ParseFail;
4042
4043  Str = Str.drop_back(2).drop_front(4);
4044
4045  uint8_t Attr;
4046  if (Str.getAsInteger(10, Attr))
4047    return MatchOperand_ParseFail;
4048
4049  SMLoc S = Parser.getTok().getLoc();
4050  Parser.Lex();
4051  if (Attr > 63) {
4052    Error(S, "out of bounds attr");
4053    return MatchOperand_Success;
4054  }
4055
4056  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4057
4058  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4059                                              AMDGPUOperand::ImmTyInterpAttr));
4060  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4061                                              AMDGPUOperand::ImmTyAttrChan));
4062  return MatchOperand_Success;
4063}
4064
4065void AMDGPUAsmParser::errorExpTgt() {
4066  Error(Parser.getTok().getLoc(), "invalid exp target");
4067}
4068
4069OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4070                                                      uint8_t &Val) {
4071  if (Str == "null") {
4072    Val = 9;
4073    return MatchOperand_Success;
4074  }
4075
4076  if (Str.startswith("mrt")) {
4077    Str = Str.drop_front(3);
4078    if (Str == "z") { // == mrtz
4079      Val = 8;
4080      return MatchOperand_Success;
4081    }
4082
4083    if (Str.getAsInteger(10, Val))
4084      return MatchOperand_ParseFail;
4085
4086    if (Val > 7)
4087      errorExpTgt();
4088
4089    return MatchOperand_Success;
4090  }
4091
4092  if (Str.startswith("pos")) {
4093    Str = Str.drop_front(3);
4094    if (Str.getAsInteger(10, Val))
4095      return MatchOperand_ParseFail;
4096
4097    if (Val > 3)
4098      errorExpTgt();
4099
4100    Val += 12;
4101    return MatchOperand_Success;
4102  }
4103
4104  if (Str.startswith("param")) {
4105    Str = Str.drop_front(5);
4106    if (Str.getAsInteger(10, Val))
4107      return MatchOperand_ParseFail;
4108
4109    if (Val >= 32)
4110      errorExpTgt();
4111
4112    Val += 32;
4113    return MatchOperand_Success;
4114  }
4115
4116  if (Str.startswith("invalid_target_")) {
4117    Str = Str.drop_front(15);
4118    if (Str.getAsInteger(10, Val))
4119      return MatchOperand_ParseFail;
4120
4121    errorExpTgt();
4122    return MatchOperand_Success;
4123  }
4124
4125  return MatchOperand_NoMatch;
4126}
4127
4128OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4129  uint8_t Val;
4130  StringRef Str = Parser.getTok().getString();
4131
4132  auto Res = parseExpTgtImpl(Str, Val);
4133  if (Res != MatchOperand_Success)
4134    return Res;
4135
4136  SMLoc S = Parser.getTok().getLoc();
4137  Parser.Lex();
4138
4139  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4140                                              AMDGPUOperand::ImmTyExpTgt));
4141  return MatchOperand_Success;
4142}
4143
4144OperandMatchResultTy
4145AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4146  using namespace llvm::AMDGPU::SendMsg;
4147
4148  int64_t Imm16Val = 0;
4149  SMLoc S = Parser.getTok().getLoc();
4150
4151  switch(getLexer().getKind()) {
4152  default:
4153    return MatchOperand_NoMatch;
4154  case AsmToken::Integer:
4155    // The operand can be an integer value.
4156    if (getParser().parseAbsoluteExpression(Imm16Val))
4157      return MatchOperand_NoMatch;
4158    if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4159      Error(S, "invalid immediate: only 16-bit values are legal");
4160      // Do not return error code, but create an imm operand anyway and proceed
4161      // to the next operand, if any. That avoids unneccessary error messages.
4162    }
4163    break;
4164  case AsmToken::Identifier: {
4165      OperandInfoTy Msg(ID_UNKNOWN_);
4166      OperandInfoTy Operation(OP_UNKNOWN_);
4167      int64_t StreamId = STREAM_ID_DEFAULT_;
4168      if (parseSendMsgConstruct(Msg, Operation, StreamId))
4169        return MatchOperand_ParseFail;
4170      do {
4171        // Validate and encode message ID.
4172        if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4173                || Msg.Id == ID_SYSMSG)) {
4174          if (Msg.IsSymbolic)
4175            Error(S, "invalid/unsupported symbolic name of message");
4176          else
4177            Error(S, "invalid/unsupported code of message");
4178          break;
4179        }
4180        Imm16Val = (Msg.Id << ID_SHIFT_);
4181        // Validate and encode operation ID.
4182        if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4183          if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4184            if (Operation.IsSymbolic)
4185              Error(S, "invalid symbolic name of GS_OP");
4186            else
4187              Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4188            break;
4189          }
4190          if (Operation.Id == OP_GS_NOP
4191              && Msg.Id != ID_GS_DONE) {
4192            Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4193            break;
4194          }
4195          Imm16Val |= (Operation.Id << OP_SHIFT_);
4196        }
4197        if (Msg.Id == ID_SYSMSG) {
4198          if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4199            if (Operation.IsSymbolic)
4200              Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4201            else
4202              Error(S, "invalid/unsupported code of SYSMSG_OP");
4203            break;
4204          }
4205          Imm16Val |= (Operation.Id << OP_SHIFT_);
4206        }
4207        // Validate and encode stream ID.
4208        if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4209          if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4210            Error(S, "invalid stream id: only 2-bit values are legal");
4211            break;
4212          }
4213          Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4214        }
4215      } while (false);
4216    }
4217    break;
4218  }
4219  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4220  return MatchOperand_Success;
4221}
4222
4223bool AMDGPUOperand::isSendMsg() const {
4224  return isImmTy(ImmTySendMsg);
4225}
4226
4227//===----------------------------------------------------------------------===//
4228// parser helpers
4229//===----------------------------------------------------------------------===//
4230
4231bool
4232AMDGPUAsmParser::trySkipId(const StringRef Id) {
4233  if (getLexer().getKind() == AsmToken::Identifier &&
4234      Parser.getTok().getString() == Id) {
4235    Parser.Lex();
4236    return true;
4237  }
4238  return false;
4239}
4240
4241bool
4242AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4243  if (getLexer().getKind() == Kind) {
4244    Parser.Lex();
4245    return true;
4246  }
4247  return false;
4248}
4249
4250bool
4251AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4252                           const StringRef ErrMsg) {
4253  if (!trySkipToken(Kind)) {
4254    Error(Parser.getTok().getLoc(), ErrMsg);
4255    return false;
4256  }
4257  return true;
4258}
4259
4260bool
4261AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4262  return !getParser().parseAbsoluteExpression(Imm);
4263}
4264
4265bool
4266AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4267  SMLoc S = Parser.getTok().getLoc();
4268  if (getLexer().getKind() == AsmToken::String) {
4269    Val = Parser.getTok().getStringContents();
4270    Parser.Lex();
4271    return true;
4272  } else {
4273    Error(S, ErrMsg);
4274    return false;
4275  }
4276}
4277
4278//===----------------------------------------------------------------------===//
4279// swizzle
4280//===----------------------------------------------------------------------===//
4281
4282LLVM_READNONE
4283static unsigned
4284encodeBitmaskPerm(const unsigned AndMask,
4285                  const unsigned OrMask,
4286                  const unsigned XorMask) {
4287  using namespace llvm::AMDGPU::Swizzle;
4288
4289  return BITMASK_PERM_ENC |
4290         (AndMask << BITMASK_AND_SHIFT) |
4291         (OrMask  << BITMASK_OR_SHIFT)  |
4292         (XorMask << BITMASK_XOR_SHIFT);
4293}
4294
4295bool
4296AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4297                                      const unsigned MinVal,
4298                                      const unsigned MaxVal,
4299                                      const StringRef ErrMsg) {
4300  for (unsigned i = 0; i < OpNum; ++i) {
4301    if (!skipToken(AsmToken::Comma, "expected a comma")){
4302      return false;
4303    }
4304    SMLoc ExprLoc = Parser.getTok().getLoc();
4305    if (!parseExpr(Op[i])) {
4306      return false;
4307    }
4308    if (Op[i] < MinVal || Op[i] > MaxVal) {
4309      Error(ExprLoc, ErrMsg);
4310      return false;
4311    }
4312  }
4313
4314  return true;
4315}
4316
4317bool
4318AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4319  using namespace llvm::AMDGPU::Swizzle;
4320
4321  int64_t Lane[LANE_NUM];
4322  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4323                           "expected a 2-bit lane id")) {
4324    Imm = QUAD_PERM_ENC;
4325    for (auto i = 0; i < LANE_NUM; ++i) {
4326      Imm |= Lane[i] << (LANE_SHIFT * i);
4327    }
4328    return true;
4329  }
4330  return false;
4331}
4332
4333bool
4334AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4335  using namespace llvm::AMDGPU::Swizzle;
4336
4337  SMLoc S = Parser.getTok().getLoc();
4338  int64_t GroupSize;
4339  int64_t LaneIdx;
4340
4341  if (!parseSwizzleOperands(1, &GroupSize,
4342                            2, 32,
4343                            "group size must be in the interval [2,32]")) {
4344    return false;
4345  }
4346  if (!isPowerOf2_64(GroupSize)) {
4347    Error(S, "group size must be a power of two");
4348    return false;
4349  }
4350  if (parseSwizzleOperands(1, &LaneIdx,
4351                           0, GroupSize - 1,
4352                           "lane id must be in the interval [0,group size - 1]")) {
4353    Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4354    return true;
4355  }
4356  return false;
4357}
4358
4359bool
4360AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4361  using namespace llvm::AMDGPU::Swizzle;
4362
4363  SMLoc S = Parser.getTok().getLoc();
4364  int64_t GroupSize;
4365
4366  if (!parseSwizzleOperands(1, &GroupSize,
4367      2, 32, "group size must be in the interval [2,32]")) {
4368    return false;
4369  }
4370  if (!isPowerOf2_64(GroupSize)) {
4371    Error(S, "group size must be a power of two");
4372    return false;
4373  }
4374
4375  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4376  return true;
4377}
4378
4379bool
4380AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4381  using namespace llvm::AMDGPU::Swizzle;
4382
4383  SMLoc S = Parser.getTok().getLoc();
4384  int64_t GroupSize;
4385
4386  if (!parseSwizzleOperands(1, &GroupSize,
4387      1, 16, "group size must be in the interval [1,16]")) {
4388    return false;
4389  }
4390  if (!isPowerOf2_64(GroupSize)) {
4391    Error(S, "group size must be a power of two");
4392    return false;
4393  }
4394
4395  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4396  return true;
4397}
4398
4399bool
4400AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4401  using namespace llvm::AMDGPU::Swizzle;
4402
4403  if (!skipToken(AsmToken::Comma, "expected a comma")) {
4404    return false;
4405  }
4406
4407  StringRef Ctl;
4408  SMLoc StrLoc = Parser.getTok().getLoc();
4409  if (!parseString(Ctl)) {
4410    return false;
4411  }
4412  if (Ctl.size() != BITMASK_WIDTH) {
4413    Error(StrLoc, "expected a 5-character mask");
4414    return false;
4415  }
4416
4417  unsigned AndMask = 0;
4418  unsigned OrMask = 0;
4419  unsigned XorMask = 0;
4420
4421  for (size_t i = 0; i < Ctl.size(); ++i) {
4422    unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4423    switch(Ctl[i]) {
4424    default:
4425      Error(StrLoc, "invalid mask");
4426      return false;
4427    case '0':
4428      break;
4429    case '1':
4430      OrMask |= Mask;
4431      break;
4432    case 'p':
4433      AndMask |= Mask;
4434      break;
4435    case 'i':
4436      AndMask |= Mask;
4437      XorMask |= Mask;
4438      break;
4439    }
4440  }
4441
4442  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4443  return true;
4444}
4445
4446bool
4447AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4448
4449  SMLoc OffsetLoc = Parser.getTok().getLoc();
4450
4451  if (!parseExpr(Imm)) {
4452    return false;
4453  }
4454  if (!isUInt<16>(Imm)) {
4455    Error(OffsetLoc, "expected a 16-bit offset");
4456    return false;
4457  }
4458  return true;
4459}
4460
4461bool
4462AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4463  using namespace llvm::AMDGPU::Swizzle;
4464
4465  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4466
4467    SMLoc ModeLoc = Parser.getTok().getLoc();
4468    bool Ok = false;
4469
4470    if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4471      Ok = parseSwizzleQuadPerm(Imm);
4472    } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4473      Ok = parseSwizzleBitmaskPerm(Imm);
4474    } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4475      Ok = parseSwizzleBroadcast(Imm);
4476    } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4477      Ok = parseSwizzleSwap(Imm);
4478    } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4479      Ok = parseSwizzleReverse(Imm);
4480    } else {
4481      Error(ModeLoc, "expected a swizzle mode");
4482    }
4483
4484    return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4485  }
4486
4487  return false;
4488}
4489
4490OperandMatchResultTy
4491AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4492  SMLoc S = Parser.getTok().getLoc();
4493  int64_t Imm = 0;
4494
4495  if (trySkipId("offset")) {
4496
4497    bool Ok = false;
4498    if (skipToken(AsmToken::Colon, "expected a colon")) {
4499      if (trySkipId("swizzle")) {
4500        Ok = parseSwizzleMacro(Imm);
4501      } else {
4502        Ok = parseSwizzleOffset(Imm);
4503      }
4504    }
4505
4506    Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4507
4508    return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4509  } else {
4510    // Swizzle "offset" operand is optional.
4511    // If it is omitted, try parsing other optional operands.
4512    return parseOptionalOpr(Operands);
4513  }
4514}
4515
4516bool
4517AMDGPUOperand::isSwizzle() const {
4518  return isImmTy(ImmTySwizzle);
4519}
4520
4521//===----------------------------------------------------------------------===//
4522// sopp branch targets
4523//===----------------------------------------------------------------------===//
4524
4525OperandMatchResultTy
4526AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4527  SMLoc S = Parser.getTok().getLoc();
4528
4529  switch (getLexer().getKind()) {
4530    default: return MatchOperand_ParseFail;
4531    case AsmToken::Integer: {
4532      int64_t Imm;
4533      if (getParser().parseAbsoluteExpression(Imm))
4534        return MatchOperand_ParseFail;
4535      Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4536      return MatchOperand_Success;
4537    }
4538
4539    case AsmToken::Identifier:
4540      Operands.push_back(AMDGPUOperand::CreateExpr(this,
4541          MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4542                                  Parser.getTok().getString()), getContext()), S));
4543      Parser.Lex();
4544      return MatchOperand_Success;
4545  }
4546}
4547
4548//===----------------------------------------------------------------------===//
4549// mubuf
4550//===----------------------------------------------------------------------===//
4551
4552AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4553  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4554}
4555
4556AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4557  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4558}
4559
4560void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4561                               const OperandVector &Operands,
4562                               bool IsAtomic,
4563                               bool IsAtomicReturn,
4564                               bool IsLds) {
4565  bool IsLdsOpcode = IsLds;
4566  bool HasLdsModifier = false;
4567  OptionalImmIndexMap OptionalIdx;
4568  assert(IsAtomicReturn ? IsAtomic : true);
4569
4570  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4571    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4572
4573    // Add the register arguments
4574    if (Op.isReg()) {
4575      Op.addRegOperands(Inst, 1);
4576      continue;
4577    }
4578
4579    // Handle the case where soffset is an immediate
4580    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4581      Op.addImmOperands(Inst, 1);
4582      continue;
4583    }
4584
4585    HasLdsModifier = Op.isLDS();
4586
4587    // Handle tokens like 'offen' which are sometimes hard-coded into the
4588    // asm string.  There are no MCInst operands for these.
4589    if (Op.isToken()) {
4590      continue;
4591    }
4592    assert(Op.isImm());
4593
4594    // Handle optional arguments
4595    OptionalIdx[Op.getImmTy()] = i;
4596  }
4597
4598  // This is a workaround for an llvm quirk which may result in an
4599  // incorrect instruction selection. Lds and non-lds versions of
4600  // MUBUF instructions are identical except that lds versions
4601  // have mandatory 'lds' modifier. However this modifier follows
4602  // optional modifiers and llvm asm matcher regards this 'lds'
4603  // modifier as an optional one. As a result, an lds version
4604  // of opcode may be selected even if it has no 'lds' modifier.
4605  if (IsLdsOpcode && !HasLdsModifier) {
4606    int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4607    if (NoLdsOpcode != -1) { // Got lds version - correct it.
4608      Inst.setOpcode(NoLdsOpcode);
4609      IsLdsOpcode = false;
4610    }
4611  }
4612
4613  // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4614  if (IsAtomicReturn) {
4615    MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4616    Inst.insert(I, *I);
4617  }
4618
4619  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4620  if (!IsAtomic) { // glc is hard-coded.
4621    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4622  }
4623  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4624
4625  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4626    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4627  }
4628}
4629
4630void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4631  OptionalImmIndexMap OptionalIdx;
4632
4633  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4634    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4635
4636    // Add the register arguments
4637    if (Op.isReg()) {
4638      Op.addRegOperands(Inst, 1);
4639      continue;
4640    }
4641
4642    // Handle the case where soffset is an immediate
4643    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4644      Op.addImmOperands(Inst, 1);
4645      continue;
4646    }
4647
4648    // Handle tokens like 'offen' which are sometimes hard-coded into the
4649    // asm string.  There are no MCInst operands for these.
4650    if (Op.isToken()) {
4651      continue;
4652    }
4653    assert(Op.isImm());
4654
4655    // Handle optional arguments
4656    OptionalIdx[Op.getImmTy()] = i;
4657  }
4658
4659  addOptionalImmOperand(Inst, Operands, OptionalIdx,
4660                        AMDGPUOperand::ImmTyOffset);
4661  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4662  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4663  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4664  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4665}
4666
4667//===----------------------------------------------------------------------===//
4668// mimg
4669//===----------------------------------------------------------------------===//
4670
4671void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4672                              bool IsAtomic) {
4673  unsigned I = 1;
4674  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4675  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4676    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4677  }
4678
4679  if (IsAtomic) {
4680    // Add src, same as dst
4681    assert(Desc.getNumDefs() == 1);
4682    ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4683  }
4684
4685  OptionalImmIndexMap OptionalIdx;
4686
4687  for (unsigned E = Operands.size(); I != E; ++I) {
4688    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4689
4690    // Add the register arguments
4691    if (Op.isReg()) {
4692      Op.addRegOperands(Inst, 1);
4693    } else if (Op.isImmModifier()) {
4694      OptionalIdx[Op.getImmTy()] = I;
4695    } else {
4696      llvm_unreachable("unexpected operand type");
4697    }
4698  }
4699
4700  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4701  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4702  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4703  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4704  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4705  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4706  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4707  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4708  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4709}
4710
4711void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4712  cvtMIMG(Inst, Operands, true);
4713}
4714
4715//===----------------------------------------------------------------------===//
4716// smrd
4717//===----------------------------------------------------------------------===//
4718
4719bool AMDGPUOperand::isSMRDOffset8() const {
4720  return isImm() && isUInt<8>(getImm());
4721}
4722
4723bool AMDGPUOperand::isSMRDOffset20() const {
4724  return isImm() && isUInt<20>(getImm());
4725}
4726
4727bool AMDGPUOperand::isSMRDLiteralOffset() const {
4728  // 32-bit literals are only supported on CI and we only want to use them
4729  // when the offset is > 8-bits.
4730  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4731}
4732
4733AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4734  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4735}
4736
4737AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4738  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4739}
4740
4741AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4742  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4743}
4744
4745AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4746  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4747}
4748
4749AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4750  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4751}
4752
4753//===----------------------------------------------------------------------===//
4754// vop3
4755//===----------------------------------------------------------------------===//
4756
4757static bool ConvertOmodMul(int64_t &Mul) {
4758  if (Mul != 1 && Mul != 2 && Mul != 4)
4759    return false;
4760
4761  Mul >>= 1;
4762  return true;
4763}
4764
4765static bool ConvertOmodDiv(int64_t &Div) {
4766  if (Div == 1) {
4767    Div = 0;
4768    return true;
4769  }
4770
4771  if (Div == 2) {
4772    Div = 3;
4773    return true;
4774  }
4775
4776  return false;
4777}
4778
4779static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4780  if (BoundCtrl == 0) {
4781    BoundCtrl = 1;
4782    return true;
4783  }
4784
4785  if (BoundCtrl == -1) {
4786    BoundCtrl = 0;
4787    return true;
4788  }
4789
4790  return false;
4791}
4792
4793// Note: the order in this table matches the order of operands in AsmString.
4794static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4795  {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4796  {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4797  {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4798  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4799  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4800  {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4801  {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
4802  {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4803  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4804  {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
4805  {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4806  {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4807  {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4808  {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
4809  {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4810  {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4811  {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4812  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4813  {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4814  {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4815  {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
4816  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4817  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
4818  {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4819  {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4820  {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4821  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4822  {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4823  {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4824  {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4825  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4826  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4827  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4828  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4829  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4830  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4831  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4832};
4833
4834OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4835  unsigned size = Operands.size();
4836  assert(size > 0);
4837
4838  OperandMatchResultTy res = parseOptionalOpr(Operands);
4839
4840  // This is a hack to enable hardcoded mandatory operands which follow
4841  // optional operands.
4842  //
4843  // Current design assumes that all operands after the first optional operand
4844  // are also optional. However implementation of some instructions violates
4845  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4846  //
4847  // To alleviate this problem, we have to (implicitly) parse extra operands
4848  // to make sure autogenerated parser of custom operands never hit hardcoded
4849  // mandatory operands.
4850
4851  if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4852
4853    // We have parsed the first optional operand.
4854    // Parse as many operands as necessary to skip all mandatory operands.
4855
4856    for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4857      if (res != MatchOperand_Success ||
4858          getLexer().is(AsmToken::EndOfStatement)) break;
4859      if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4860      res = parseOptionalOpr(Operands);
4861    }
4862  }
4863
4864  return res;
4865}
4866
4867OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4868  OperandMatchResultTy res;
4869  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4870    // try to parse any optional operand here
4871    if (Op.IsBit) {
4872      res = parseNamedBit(Op.Name, Operands, Op.Type);
4873    } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4874      res = parseOModOperand(Operands);
4875    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4876               Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4877               Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4878      res = parseSDWASel(Operands, Op.Name, Op.Type);
4879    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4880      res = parseSDWADstUnused(Operands);
4881    } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4882               Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4883               Op.Type == AMDGPUOperand::ImmTyNegLo ||
4884               Op.Type == AMDGPUOperand::ImmTyNegHi) {
4885      res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4886                                        Op.ConvertResult);
4887    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
4888      res = parseDfmtNfmt(Operands);
4889    } else {
4890      res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4891    }
4892    if (res != MatchOperand_NoMatch) {
4893      return res;
4894    }
4895  }
4896  return MatchOperand_NoMatch;
4897}
4898
4899OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4900  StringRef Name = Parser.getTok().getString();
4901  if (Name == "mul") {
4902    return parseIntWithPrefix("mul", Operands,
4903                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4904  }
4905
4906  if (Name == "div") {
4907    return parseIntWithPrefix("div", Operands,
4908                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4909  }
4910
4911  return MatchOperand_NoMatch;
4912}
4913
4914void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4915  cvtVOP3P(Inst, Operands);
4916
4917  int Opc = Inst.getOpcode();
4918
4919  int SrcNum;
4920  const int Ops[] = { AMDGPU::OpName::src0,
4921                      AMDGPU::OpName::src1,
4922                      AMDGPU::OpName::src2 };
4923  for (SrcNum = 0;
4924       SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4925       ++SrcNum);
4926  assert(SrcNum > 0);
4927
4928  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4929  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4930
4931  if ((OpSel & (1 << SrcNum)) != 0) {
4932    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4933    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4934    Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4935  }
4936}
4937
4938static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4939      // 1. This operand is input modifiers
4940  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4941      // 2. This is not last operand
4942      && Desc.NumOperands > (OpNum + 1)
4943      // 3. Next operand is register class
4944      && Desc.OpInfo[OpNum + 1].RegClass != -1
4945      // 4. Next register is not tied to any other operand
4946      && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4947}
4948
4949void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4950{
4951  OptionalImmIndexMap OptionalIdx;
4952  unsigned Opc = Inst.getOpcode();
4953
4954  unsigned I = 1;
4955  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4956  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4957    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4958  }
4959
4960  for (unsigned E = Operands.size(); I != E; ++I) {
4961    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4962    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4963      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4964    } else if (Op.isInterpSlot() ||
4965               Op.isInterpAttr() ||
4966               Op.isAttrChan()) {
4967      Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4968    } else if (Op.isImmModifier()) {
4969      OptionalIdx[Op.getImmTy()] = I;
4970    } else {
4971      llvm_unreachable("unhandled operand type");
4972    }
4973  }
4974
4975  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4976    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4977  }
4978
4979  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4980    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4981  }
4982
4983  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4984    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4985  }
4986}
4987
4988void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4989                              OptionalImmIndexMap &OptionalIdx) {
4990  unsigned Opc = Inst.getOpcode();
4991
4992  unsigned I = 1;
4993  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4994  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4995    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4996  }
4997
4998  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4999    // This instruction has src modifiers
5000    for (unsigned E = Operands.size(); I != E; ++I) {
5001      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5002      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5003        Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5004      } else if (Op.isImmModifier()) {
5005        OptionalIdx[Op.getImmTy()] = I;
5006      } else if (Op.isRegOrImm()) {
5007        Op.addRegOrImmOperands(Inst, 1);
5008      } else {
5009        llvm_unreachable("unhandled operand type");
5010      }
5011    }
5012  } else {
5013    // No src modifiers
5014    for (unsigned E = Operands.size(); I != E; ++I) {
5015      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5016      if (Op.isMod()) {
5017        OptionalIdx[Op.getImmTy()] = I;
5018      } else {
5019        Op.addRegOrImmOperands(Inst, 1);
5020      }
5021    }
5022  }
5023
5024  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5025    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5026  }
5027
5028  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5029    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5030  }
5031
5032  // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5033  // it has src2 register operand that is tied to dst operand
5034  // we don't allow modifiers for this operand in assembler so src2_modifiers
5035  // should be 0.
5036  if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5037      Opc == AMDGPU::V_MAC_F32_e64_vi ||
5038      Opc == AMDGPU::V_MAC_F16_e64_vi ||
5039      Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5040    auto it = Inst.begin();
5041    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5042    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5043    ++it;
5044    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5045  }
5046}
5047
5048void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5049  OptionalImmIndexMap OptionalIdx;
5050  cvtVOP3(Inst, Operands, OptionalIdx);
5051}
5052
5053void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5054                               const OperandVector &Operands) {
5055  OptionalImmIndexMap OptIdx;
5056  const int Opc = Inst.getOpcode();
5057  const MCInstrDesc &Desc = MII.get(Opc);
5058
5059  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5060
5061  cvtVOP3(Inst, Operands, OptIdx);
5062
5063  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5064    assert(!IsPacked);
5065    Inst.addOperand(Inst.getOperand(0));
5066  }
5067
5068  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5069  // instruction, and then figure out where to actually put the modifiers
5070
5071  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5072
5073  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5074  if (OpSelHiIdx != -1) {
5075    int DefaultVal = IsPacked ? -1 : 0;
5076    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5077                          DefaultVal);
5078  }
5079
5080  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5081  if (NegLoIdx != -1) {
5082    assert(IsPacked);
5083    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5084    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5085  }
5086
5087  const int Ops[] = { AMDGPU::OpName::src0,
5088                      AMDGPU::OpName::src1,
5089                      AMDGPU::OpName::src2 };
5090  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5091                         AMDGPU::OpName::src1_modifiers,
5092                         AMDGPU::OpName::src2_modifiers };
5093
5094  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5095
5096  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5097  unsigned OpSelHi = 0;
5098  unsigned NegLo = 0;
5099  unsigned NegHi = 0;
5100
5101  if (OpSelHiIdx != -1) {
5102    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5103  }
5104
5105  if (NegLoIdx != -1) {
5106    int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5107    NegLo = Inst.getOperand(NegLoIdx).getImm();
5108    NegHi = Inst.getOperand(NegHiIdx).getImm();
5109  }
5110
5111  for (int J = 0; J < 3; ++J) {
5112    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5113    if (OpIdx == -1)
5114      break;
5115
5116    uint32_t ModVal = 0;
5117
5118    if ((OpSel & (1 << J)) != 0)
5119      ModVal |= SISrcMods::OP_SEL_0;
5120
5121    if ((OpSelHi & (1 << J)) != 0)
5122      ModVal |= SISrcMods::OP_SEL_1;
5123
5124    if ((NegLo & (1 << J)) != 0)
5125      ModVal |= SISrcMods::NEG;
5126
5127    if ((NegHi & (1 << J)) != 0)
5128      ModVal |= SISrcMods::NEG_HI;
5129
5130    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5131
5132    Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5133  }
5134}
5135
5136//===----------------------------------------------------------------------===//
5137// dpp
5138//===----------------------------------------------------------------------===//
5139
5140bool AMDGPUOperand::isDPPCtrl() const {
5141  using namespace AMDGPU::DPP;
5142
5143  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5144  if (result) {
5145    int64_t Imm = getImm();
5146    return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5147           (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5148           (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5149           (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5150           (Imm == DppCtrl::WAVE_SHL1) ||
5151           (Imm == DppCtrl::WAVE_ROL1) ||
5152           (Imm == DppCtrl::WAVE_SHR1) ||
5153           (Imm == DppCtrl::WAVE_ROR1) ||
5154           (Imm == DppCtrl::ROW_MIRROR) ||
5155           (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5156           (Imm == DppCtrl::BCAST15) ||
5157           (Imm == DppCtrl::BCAST31);
5158  }
5159  return false;
5160}
5161
5162bool AMDGPUOperand::isGPRIdxMode() const {
5163  return isImm() && isUInt<4>(getImm());
5164}
5165
5166bool AMDGPUOperand::isS16Imm() const {
5167  return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5168}
5169
5170bool AMDGPUOperand::isU16Imm() const {
5171  return isImm() && isUInt<16>(getImm());
5172}
5173
5174OperandMatchResultTy
5175AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5176  using namespace AMDGPU::DPP;
5177
5178  SMLoc S = Parser.getTok().getLoc();
5179  StringRef Prefix;
5180  int64_t Int;
5181
5182  if (getLexer().getKind() == AsmToken::Identifier) {
5183    Prefix = Parser.getTok().getString();
5184  } else {
5185    return MatchOperand_NoMatch;
5186  }
5187
5188  if (Prefix == "row_mirror") {
5189    Int = DppCtrl::ROW_MIRROR;
5190    Parser.Lex();
5191  } else if (Prefix == "row_half_mirror") {
5192    Int = DppCtrl::ROW_HALF_MIRROR;
5193    Parser.Lex();
5194  } else {
5195    // Check to prevent parseDPPCtrlOps from eating invalid tokens
5196    if (Prefix != "quad_perm"
5197        && Prefix != "row_shl"
5198        && Prefix != "row_shr"
5199        && Prefix != "row_ror"
5200        && Prefix != "wave_shl"
5201        && Prefix != "wave_rol"
5202        && Prefix != "wave_shr"
5203        && Prefix != "wave_ror"
5204        && Prefix != "row_bcast") {
5205      return MatchOperand_NoMatch;
5206    }
5207
5208    Parser.Lex();
5209    if (getLexer().isNot(AsmToken::Colon))
5210      return MatchOperand_ParseFail;
5211
5212    if (Prefix == "quad_perm") {
5213      // quad_perm:[%d,%d,%d,%d]
5214      Parser.Lex();
5215      if (getLexer().isNot(AsmToken::LBrac))
5216        return MatchOperand_ParseFail;
5217      Parser.Lex();
5218
5219      if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5220        return MatchOperand_ParseFail;
5221
5222      for (int i = 0; i < 3; ++i) {
5223        if (getLexer().isNot(AsmToken::Comma))
5224          return MatchOperand_ParseFail;
5225        Parser.Lex();
5226
5227        int64_t Temp;
5228        if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5229          return MatchOperand_ParseFail;
5230        const int shift = i*2 + 2;
5231        Int += (Temp << shift);
5232      }
5233
5234      if (getLexer().isNot(AsmToken::RBrac))
5235        return MatchOperand_ParseFail;
5236      Parser.Lex();
5237    } else {
5238      // sel:%d
5239      Parser.Lex();
5240      if (getParser().parseAbsoluteExpression(Int))
5241        return MatchOperand_ParseFail;
5242
5243      if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5244        Int |= DppCtrl::ROW_SHL0;
5245      } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5246        Int |= DppCtrl::ROW_SHR0;
5247      } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5248        Int |= DppCtrl::ROW_ROR0;
5249      } else if (Prefix == "wave_shl" && 1 == Int) {
5250        Int = DppCtrl::WAVE_SHL1;
5251      } else if (Prefix == "wave_rol" && 1 == Int) {
5252        Int = DppCtrl::WAVE_ROL1;
5253      } else if (Prefix == "wave_shr" && 1 == Int) {
5254        Int = DppCtrl::WAVE_SHR1;
5255      } else if (Prefix == "wave_ror" && 1 == Int) {
5256        Int = DppCtrl::WAVE_ROR1;
5257      } else if (Prefix == "row_bcast") {
5258        if (Int == 15) {
5259          Int = DppCtrl::BCAST15;
5260        } else if (Int == 31) {
5261          Int = DppCtrl::BCAST31;
5262        } else {
5263          return MatchOperand_ParseFail;
5264        }
5265      } else {
5266        return MatchOperand_ParseFail;
5267      }
5268    }
5269  }
5270
5271  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5272  return MatchOperand_Success;
5273}
5274
5275AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5276  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5277}
5278
5279AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5280  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5281}
5282
5283AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5284  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5285}
5286
5287void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5288  OptionalImmIndexMap OptionalIdx;
5289
5290  unsigned I = 1;
5291  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5292  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5293    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5294  }
5295
5296  for (unsigned E = Operands.size(); I != E; ++I) {
5297    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5298                                            MCOI::TIED_TO);
5299    if (TiedTo != -1) {
5300      assert((unsigned)TiedTo < Inst.getNumOperands());
5301      // handle tied old or src2 for MAC instructions
5302      Inst.addOperand(Inst.getOperand(TiedTo));
5303    }
5304    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5305    // Add the register arguments
5306    if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5307      // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5308      // Skip it.
5309      continue;
5310    } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5311      Op.addRegWithFPInputModsOperands(Inst, 2);
5312    } else if (Op.isDPPCtrl()) {
5313      Op.addImmOperands(Inst, 1);
5314    } else if (Op.isImm()) {
5315      // Handle optional arguments
5316      OptionalIdx[Op.getImmTy()] = I;
5317    } else {
5318      llvm_unreachable("Invalid operand type");
5319    }
5320  }
5321
5322  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5323  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5324  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5325}
5326
5327//===----------------------------------------------------------------------===//
5328// sdwa
5329//===----------------------------------------------------------------------===//
5330
5331OperandMatchResultTy
5332AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5333                              AMDGPUOperand::ImmTy Type) {
5334  using namespace llvm::AMDGPU::SDWA;
5335
5336  SMLoc S = Parser.getTok().getLoc();
5337  StringRef Value;
5338  OperandMatchResultTy res;
5339
5340  res = parseStringWithPrefix(Prefix, Value);
5341  if (res != MatchOperand_Success) {
5342    return res;
5343  }
5344
5345  int64_t Int;
5346  Int = StringSwitch<int64_t>(Value)
5347        .Case("BYTE_0", SdwaSel::BYTE_0)
5348        .Case("BYTE_1", SdwaSel::BYTE_1)
5349        .Case("BYTE_2", SdwaSel::BYTE_2)
5350        .Case("BYTE_3", SdwaSel::BYTE_3)
5351        .Case("WORD_0", SdwaSel::WORD_0)
5352        .Case("WORD_1", SdwaSel::WORD_1)
5353        .Case("DWORD", SdwaSel::DWORD)
5354        .Default(0xffffffff);
5355  Parser.Lex(); // eat last token
5356
5357  if (Int == 0xffffffff) {
5358    return MatchOperand_ParseFail;
5359  }
5360
5361  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5362  return MatchOperand_Success;
5363}
5364
5365OperandMatchResultTy
5366AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5367  using namespace llvm::AMDGPU::SDWA;
5368
5369  SMLoc S = Parser.getTok().getLoc();
5370  StringRef Value;
5371  OperandMatchResultTy res;
5372
5373  res = parseStringWithPrefix("dst_unused", Value);
5374  if (res != MatchOperand_Success) {
5375    return res;
5376  }
5377
5378  int64_t Int;
5379  Int = StringSwitch<int64_t>(Value)
5380        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5381        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5382        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5383        .Default(0xffffffff);
5384  Parser.Lex(); // eat last token
5385
5386  if (Int == 0xffffffff) {
5387    return MatchOperand_ParseFail;
5388  }
5389
5390  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5391  return MatchOperand_Success;
5392}
5393
5394void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5395  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5396}
5397
5398void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5399  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5400}
5401
5402void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5403  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5404}
5405
5406void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5407  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5408}
5409
5410void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5411                              uint64_t BasicInstType, bool skipVcc) {
5412  using namespace llvm::AMDGPU::SDWA;
5413
5414  OptionalImmIndexMap OptionalIdx;
5415  bool skippedVcc = false;
5416
5417  unsigned I = 1;
5418  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5419  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5420    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5421  }
5422
5423  for (unsigned E = Operands.size(); I != E; ++I) {
5424    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5425    if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5426      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5427      // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5428      // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5429      // Skip VCC only if we didn't skip it on previous iteration.
5430      if (BasicInstType == SIInstrFlags::VOP2 &&
5431          (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5432        skippedVcc = true;
5433        continue;
5434      } else if (BasicInstType == SIInstrFlags::VOPC &&
5435                 Inst.getNumOperands() == 0) {
5436        skippedVcc = true;
5437        continue;
5438      }
5439    }
5440    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5441      Op.addRegOrImmWithInputModsOperands(Inst, 2);
5442    } else if (Op.isImm()) {
5443      // Handle optional arguments
5444      OptionalIdx[Op.getImmTy()] = I;
5445    } else {
5446      llvm_unreachable("Invalid operand type");
5447    }
5448    skippedVcc = false;
5449  }
5450
5451  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5452      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5453    // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5454    switch (BasicInstType) {
5455    case SIInstrFlags::VOP1:
5456      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5457      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5458        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5459      }
5460      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5461      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5462      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5463      break;
5464
5465    case SIInstrFlags::VOP2:
5466      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5467      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5468        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5469      }
5470      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5471      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5472      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5473      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5474      break;
5475
5476    case SIInstrFlags::VOPC:
5477      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5478      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5479      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5480      break;
5481
5482    default:
5483      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5484    }
5485  }
5486
5487  // special case v_mac_{f16, f32}:
5488  // it has src2 register operand that is tied to dst operand
5489  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5490      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5491    auto it = Inst.begin();
5492    std::advance(
5493      it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5494    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5495  }
5496}
5497
5498/// Force static initialization.
5499extern "C" void LLVMInitializeAMDGPUAsmParser() {
5500  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5501  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5502}
5503
5504#define GET_REGISTER_MATCHER
5505#define GET_MATCHER_IMPLEMENTATION
5506#define GET_MNEMONIC_SPELL_CHECKER
5507#include "AMDGPUGenAsmMatcher.inc"
5508
5509// This fuction should be defined after auto-generated include so that we have
5510// MatchClassKind enum defined
5511unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5512                                                     unsigned Kind) {
5513  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5514  // But MatchInstructionImpl() expects to meet token and fails to validate
5515  // operand. This method checks if we are given immediate operand but expect to
5516  // get corresponding token.
5517  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5518  switch (Kind) {
5519  case MCK_addr64:
5520    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5521  case MCK_gds:
5522    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5523  case MCK_lds:
5524    return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5525  case MCK_glc:
5526    return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5527  case MCK_idxen:
5528    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5529  case MCK_offen:
5530    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5531  case MCK_SSrcB32:
5532    // When operands have expression values, they will return true for isToken,
5533    // because it is not possible to distinguish between a token and an
5534    // expression at parse time. MatchInstructionImpl() will always try to
5535    // match an operand as a token, when isToken returns true, and when the
5536    // name of the expression is not a valid token, the match will fail,
5537    // so we need to handle it here.
5538    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5539  case MCK_SSrcF32:
5540    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5541  case MCK_SoppBrTarget:
5542    return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5543  case MCK_VReg32OrOff:
5544    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5545  case MCK_InterpSlot:
5546    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5547  case MCK_Attr:
5548    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5549  case MCK_AttrChan:
5550    return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5551  default:
5552    return Match_InvalidOperand;
5553  }
5554}
5555